// Anime4K_Upscale_GAN_x3_L
// 移植自 https://github.com/bloc97/Anime4K/blob/8e39551ce96ed172605c89b7dd8be855b5502cc9/glsl/Upscale/Anime4K_Upscale_GAN_x3_L.glsl

//!MAGPIE EFFECT
//!VERSION 4

// 圆括号内的输入只被采样一次
// INPUT -> tf, tf1, tf2
// tf, tf1, tf2 -> 1_tf, 3_tf, 3_tf1, 3_tf2
// 3_tf, 3_tf1, 3_tf2, (1_tf) -> 4_tf, 6_tf, 6_tf1, 6_tf2
// 6_tf, 6_tf1, 6_tf2, (1_tf), (4_tf) -> 7_tf, 9_tf, 9_tf1, 9_tf2
// 9_tf, 9_tf1, 9_tf2, (1_tf), (4_tf), (7_tf) -> 11_tf, 10_tf, 12_tf, 12_tf1, 12_tf2
// 12_tf, 12_tf1, 12_tf2,  11_tf, (1_tf), (4_tf), (7_tf), (10_tf) -> 0ups, 0ups1, 0ups2
// 0ups, 0ups1, 0ups2 -> 1ups, 1ups1
// (INPUT), 1ups, 1ups1 -> OUTPUT

//!TEXTURE
Texture2D INPUT;

//!TEXTURE
//!WIDTH INPUT_WIDTH * 3
//!HEIGHT INPUT_HEIGHT * 3
Texture2D OUTPUT;

//!SAMPLER
//!FILTER POINT
SamplerState sam;

//!SAMPLER
//!FILTER LINEAR
SamplerState sam1;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex1;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex2;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex3;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex4;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex5;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex6;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex7;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex8;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex9;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex10;

//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
//!FORMAT R16G16B16A16_FLOAT
Texture2D tex11;

//!TEXTURE
//!WIDTH INPUT_WIDTH * 3
//!HEIGHT INPUT_HEIGHT * 3
//!FORMAT R16G16B16A16_FLOAT
Texture2D conv1ups;

//!TEXTURE
//!WIDTH INPUT_WIDTH * 3
//!HEIGHT INPUT_HEIGHT * 3
//!FORMAT R16G16B16A16_FLOAT
Texture2D conv1ups1;


//!PASS 1
//!DESC Conv-4x3x3x3
//!IN INPUT
//!OUT tex1, tex2, tex3
//!BLOCK_SIZE 16
//!NUM_THREADS 64

void Pass1(uint2 blockStart, uint3 threadId) {
	uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}
	float2 inputPt = GetInputPt();

	uint i, j;

	min16float3 src[4][4];
	[unroll]
	for (i = 0; i <= 2; i += 2) {
		[unroll]
		for (j = 0; j <= 2; j += 2) {
			float2 tpos = (gxy + uint2(i, j)) * inputPt;
			const min16float4 sr = INPUT.GatherRed(sam, tpos);
			const min16float4 sg = INPUT.GatherGreen(sam, tpos);
			const min16float4 sb = INPUT.GatherBlue(sam, tpos);

			// w z
			// x y
			src[i][j] = min16float3(sr.w, sg.w, sb.w);
			src[i][j + 1] = min16float3(sr.x, sg.x, sb.x);
			src[i + 1][j] = min16float3(sr.z, sg.z, sb.z);
			src[i + 1][j + 1] = min16float3(sr.y, sg.y, sb.y);
		}
	}

	[unroll]
	for (i = 1; i <= 2; ++i) {
		[unroll]
		for (j = 1; j <= 2; ++j) {
			uint2 destPos = gxy + uint2(i - 1, j - 1);

			if (i != 1 || j != 1) {
				if (destPos.x >= inputSize.x || destPos.y >= inputSize.y) {
					continue;
				}
			}

			min16float4 result = mul(src[i - 1][j - 1], min16float3x4(-0.26345107, 0.18636681, 0.068192646, 0.06335259, -0.5026903, -0.39884016, -0.14562744, -0.24653248, -0.44533378, 0.52169526, -0.35453957, 0.25303423));
			result += mul(src[i - 1][j], min16float3x4(-0.22396083, 0.1324318, 0.47152156, -0.3061965, -0.06026671, -0.26795772, 0.0081171375, -0.32897332, -0.16401465, -0.1018444, 0.48241594, -0.09054633));
			result += mul(src[i - 1][j + 1], min16float3x4(0.25090155, -0.15917313, 0.028407926, -0.24014995, 0.4114972, -0.45535553, 0.08742311, 0.16796699, 0.0995656, -0.4009339, 0.21471445, 0.2708967));
			result += mul(src[i][j - 1], min16float3x4(-0.16269766, 0.16389379, -0.12857921, -0.1602467, -0.16460834, 0.15754342, 0.46217716, 0.20442651, 0.0548621, -0.018400457, 0.38643107, -0.29171357));
			result += mul(src[i][j], min16float3x4(-0.24035631, -0.33344224, -0.3904698, -0.4168555, -0.42237657, 0.36649242, 0.41396108, -0.38945103, -0.5806718, 0.035621256, 0.09171773, -0.54301006));
			result += mul(src[i][j + 1], min16float3x4(0.15957133, -0.035278857, 0.1318051, 0.6896821, 0.18556473, 0.16378926, 0.32670698, 0.2675555, 0.08802092, 0.41140598, 0.05322177, 0.5030955));
			result += mul(src[i + 1][j - 1], min16float3x4(-0.082798496, 0.24381381, -0.30908522, 0.04553323, 0.25664318, 0.4123797, -0.29377607, 0.15920162, 0.13717672, 0.027625162, 0.25476956, 0.21843456));
			result += mul(src[i + 1][j], min16float3x4(0.14534818, -0.239681, 0.22961527, 0.3814783, 0.1233398, 0.2449555, 0.015051085, 0.1661234, -0.27740797, -0.29109767, -0.19438179, -0.027439274));
			result += mul(src[i + 1][j + 1], min16float3x4(0.0011904882, -0.01287622, -0.1573707, -0.13167281, -0.12803882, -0.079415865, -0.04034391, -0.09625339, 0.23190106, -0.26743674, -0.48981485, -0.2063946));
			result += min16float4(0.034235504, 0.039522275, -0.032817896, -0.0031068379);
			tex1[destPos] = result;
			
			result = mul(src[i - 1][j - 1], min16float3x4(-0.17155029, -0.084075995, 0.2281505, 0.38326037, 0.18672232, -0.2562305, 0.30811027, 0.30188802, -0.24588907, 0.088734694, 0.14092724, -0.18793459));
			result += mul(src[i - 1][j], min16float3x4(-0.47514066, 0.51882815, 0.1561294, -0.043147214, -0.19554369, 0.19514531, -0.14636773, 0.11425865, -0.2772368, 0.5388449, 0.54875004, -0.4526634));
			result += mul(src[i - 1][j + 1], min16float3x4(0.11270131, 0.44642356, -0.066219814, 0.15781905, 0.056682296, 0.026522577, 0.05600635, -0.13799536, 0.15637676, -0.15661198, 0.53794587, 0.09693692));
			result += mul(src[i][j - 1], min16float3x4(-0.23679815, 0.16397353, 0.37343305, 0.07477207, -0.36061585, 0.24027273, 0.3222875, 0.05577238, -0.17547923, 0.11737104, 0.10193468, -0.056727592));
			result += mul(src[i][j], min16float3x4(0.2335428, -0.5571976, 0.13586389, -0.3443148, 0.4537042, -0.59349614, -0.24114902, 0.08669349, 0.2881981, -0.29106617, -0.47775048, 0.22723311));
			result += mul(src[i][j + 1], min16float3x4(0.006350133, -0.28196353, 0.22710627, 0.30080464, -0.3500525, 0.09254133, -0.48047104, -0.30452347, -0.077637784, -0.11856046, 0.07377078, 0.44280833));
			result += mul(src[i + 1][j - 1], min16float3x4(0.2200762, 0.3665277, 0.043291833, 0.21484855, 0.15553318, -0.035003938, 0.14891839, -0.29007155, 0.23154758, -0.2348225, 0.48130423, 0.00733271));
			result += mul(src[i + 1][j], min16float3x4(0.28228128, 0.054867495, 0.08010268, -0.2980908, 0.15146615, -0.058449056, -0.43990552, -0.5963296, 0.09321943, 0.20146254, -0.08043876, 0.017381484));
			result += mul(src[i + 1][j + 1], min16float3x4(0.076894, 0.16354772, 0.25471574, 0.24382424, -0.15274979, -0.19706573, -0.30667382, 0.523845, 0.023073493, 0.34462887, -0.3384359, 0.18867111));
			result += min16float4(0.014904483, -0.009271063, 0.04884906, 0.0106121525);
			tex2[destPos] = result;
			
			result = mul(src[i - 1][j - 1], min16float3x4(-0.34360278, -0.28731042, -0.017787619, 0.36802426, 0.33655256, -0.24784079, 0.29148427, 0.28857, -0.3111454, 0.0030706236, -0.25914, 0.5528963));
			result += mul(src[i - 1][j], min16float3x4(0.12459981, -0.17094392, -0.18776429, 0.37819883, 0.1320519, 0.21927781, -0.16188109, 0.050895408, -0.06871313, 0.16754176, 0.29934305, 0.052247107));
			result += mul(src[i - 1][j + 1], min16float3x4(-0.016753385, -0.0935026, -0.3025131, 0.029084548, -0.17713268, 0.23525053, 0.015773006, 0.5464473, 0.49457568, 0.03073306, 0.18685353, 0.28700578));
			result += mul(src[i][j - 1], min16float3x4(0.135332, 0.07585244, 0.05262212, -0.15484884, -0.13468477, 0.5161883, 0.10347934, -0.37127933, 0.12426171, 0.48973167, 0.19040361, -0.24403319));
			result += mul(src[i][j], min16float3x4(-0.54557467, 0.07250278, 0.37912187, 0.0044768555, -0.47080016, -0.4050018, 0.64416456, -0.58235925, -0.28048036, -0.32962233, -0.28131053, 0.022653949));
			result += mul(src[i][j + 1], min16float3x4(0.17059836, 0.016603703, 0.34638256, 0.028987328, 0.43271738, -0.15030707, 0.072848, 0.1422675, -0.23391044, -0.12179815, 0.37569857, -0.056668952));
			result += mul(src[i + 1][j - 1], min16float3x4(-0.0428437, 0.15237094, -0.26750615, 0.053740855, -0.04772152, -0.13561963, -0.20043467, -0.018060924, 0.29031327, -0.17592178, -0.5016104, -0.36639994));
			result += mul(src[i + 1][j], min16float3x4(0.39091983, -0.257284, -0.39293087, -0.1182859, -0.46328986, -0.1585645, -0.32158652, 0.41519204, 0.21179573, -0.3613411, -0.032484483, -0.03755994));
			result += mul(src[i + 1][j + 1], min16float3x4(0.42772895, 0.11436431, -0.115817815, -0.29173127, 0.57807744, -0.21997264, -0.49362126, 0.021626333, 0.1258072, -0.062251803, -0.16541855, 0.061321106));
			result += min16float4(-0.017981518, -0.012223751, -0.0033700857, 0.013441364);
			tex3[destPos] = result;
		}
	}
}


//!PASS 2
//!DESC Conv-4x3x3x24, Conv-4x1x1x40
//!IN tex1, tex2, tex3
//!OUT tex4, tex5, tex6, tex7
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass2(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	min16float4 a1 = tex1.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e1 = tex1.SampleLevel(sam, pos, 0);
	min16float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i1 = tex1.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na1 = max(-a1, 0);
	min16float4 nb1 = max(-b1, 0);
	min16float4 nc1 = max(-c1, 0);
	min16float4 nd1 = max(-d1, 0);
	min16float4 ne1 = max(-e1, 0);
	min16float4 nf1 = max(-f1, 0);
	min16float4 ng1 = max(-g1, 0);
	min16float4 nh1 = max(-h1, 0);
	min16float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);
	
	min16float4 a2 = tex2.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e2 = tex2.SampleLevel(sam, pos, 0);
	min16float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i2 = tex2.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na2 = max(-a2, 0);
	min16float4 nb2 = max(-b2, 0);
	min16float4 nc2 = max(-c2, 0);
	min16float4 nd2 = max(-d2, 0);
	min16float4 ne2 = max(-e2, 0);
	min16float4 nf2 = max(-f2, 0);
	min16float4 ng2 = max(-g2, 0);
	min16float4 nh2 = max(-h2, 0);
	min16float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);
	
	min16float4 a3 = tex3.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e3 = tex3.SampleLevel(sam, pos, 0);
	min16float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i3 = tex3.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na3 = max(-a3, 0);
	min16float4 nb3 = max(-b3, 0);
	min16float4 nc3 = max(-c3, 0);
	min16float4 nd3 = max(-d3, 0);
	min16float4 ne3 = max(-e3, 0);
	min16float4 nf3 = max(-f3, 0);
	min16float4 ng3 = max(-g3, 0);
	min16float4 nh3 = max(-h3, 0);
	min16float4 ni3 = max(-i3, 0);

	a3 = max(a3, 0);
	b3 = max(b3, 0);
	c3 = max(c3, 0);
	d3 = max(d3, 0);
	e3 = max(e3, 0);
	f3 = max(f3, 0);
	g3 = max(g3, 0);
	h3 = max(h3, 0);
	i3 = max(i3, 0);

	min16float4 conv2d_2_tf = mul(a1, min16float4x4(0.1881249, -0.14544061, -0.022969106, 0.088232316, 0.0058642747, -0.049336948, -0.039974928, 0.06410949, -0.09211665, -0.034005307, -0.1095955, 0.10930763, -0.26397142, 0.1384094, 0.017342392, -0.21376696));
	conv2d_2_tf += mul(b1, min16float4x4(-0.14526816, -0.0639951, 0.11742242, -0.006982521, -0.05208895, 0.089485295, -0.19564004, -0.08121572, -0.11621622, 0.15375662, -0.15378582, 0.0596373, 0.14132364, -0.06385903, 0.05449634, -0.047440365));
	conv2d_2_tf += mul(c1, min16float4x4(-0.082622305, -0.23856479, 0.06367865, -0.019509695, 0.094882965, -0.04511791, 0.16706854, 0.20536391, 0.1409632, -0.10635743, 0.038904104, -0.0039008786, 0.16198882, -0.17409256, 0.13213669, 0.08329318));
	conv2d_2_tf += mul(d1, min16float4x4(-0.1186756, 0.2009846, -0.086417995, 0.20491274, -0.13763973, -0.0800847, 0.16069777, 0.10931271, 0.14152408, 0.047218926, 0.041168302, -0.054257084, -0.08315953, -0.1573787, 0.20787828, 0.118524544));
	conv2d_2_tf += mul(e1, min16float4x4(-0.19046788, 0.10213364, -0.112078644, -0.16287695, 0.011410189, -0.016858546, -0.09383451, -0.063516155, -0.17561492, -0.15199865, -0.112707786, -0.18099716, 0.19017689, -0.20048961, -0.5382596, -0.24332014));
	conv2d_2_tf += mul(f1, min16float4x4(-0.1465597, 0.044423096, 0.04632811, -0.015121401, -0.051081203, -0.09574356, -0.10018257, -0.019390205, -0.1562855, 0.041693382, -0.012624074, 0.057703923, 0.09825134, 0.1544577, 0.1683734, 0.018580355));
	conv2d_2_tf += mul(g1, min16float4x4(-0.22240564, -0.051839057, 0.12950379, 0.0048653902, 0.0144696245, -0.10698864, -0.08654499, -0.131132, 0.15429983, 0.025204081, -0.09136411, -0.053068906, -0.005858075, -0.02560129, 0.0469077, 0.018962694));
	conv2d_2_tf += mul(h1, min16float4x4(-0.4698737, -0.053354982, 0.27541625, -0.020424731, 0.06935965, 0.008045162, -0.11538889, -0.038876567, -0.049084928, 0.1629101, -0.012742019, 0.12038333, -0.0705842, 0.12735052, 0.17640172, 0.050716672));
	conv2d_2_tf += mul(i1, min16float4x4(-0.10064598, 0.016594354, -0.14633141, -0.09175336, -0.12889755, -0.1671076, 0.22031903, 0.0759859, 0.102435045, -0.072596334, -0.17714, 0.03531571, -0.022843607, 0.047148425, 0.105391234, 0.05260699));
	conv2d_2_tf += mul(a2, min16float4x4(-0.1057386, 0.020954097, -0.022159133, 0.064248964, -0.031145383, -0.008180922, -0.023611609, 0.05197287, -0.017418958, 0.02461813, 0.0584847, -0.10087345, -0.16315617, 0.15651843, 0.10478647, 0.08347392));
	conv2d_2_tf += mul(b2, min16float4x4(-0.01323452, 0.044956483, -0.007983463, 0.10643116, -0.030048033, -0.11766427, -0.101889476, 0.015120098, 0.031690307, 0.014470776, -0.10197176, -0.10045749, -0.065616645, -0.15230782, -0.26183948, -0.071056716));
	conv2d_2_tf += mul(c2, min16float4x4(0.026220636, -0.044518135, 0.17167594, 0.3016424, 0.12054841, 0.042369425, 0.04208856, 0.14938886, -0.07018442, -0.008244587, 0.14260693, -0.094418734, -0.032693435, 0.042990524, -0.053002246, -0.003936231));
	conv2d_2_tf += mul(d2, min16float4x4(-0.10141095, 0.17178011, -0.10951717, -0.28119737, 0.008288983, 0.14197187, 0.10903869, 0.017220182, 0.041842293, -0.03106527, -0.05892881, 0.02668739, 0.072505936, -0.060759444, 0.00032896115, -0.03440771));
	conv2d_2_tf += mul(e2, min16float4x4(0.13831837, -0.13864368, 0.15232176, 0.31198958, 0.033965178, 0.053397447, -0.30352455, -0.17199865, -0.06429645, 0.013913047, 0.10764071, -0.12238359, -0.04544379, 0.17074125, 0.024108075, -0.14521888));
	conv2d_2_tf += mul(f2, min16float4x4(-0.11011318, -0.0102100335, -0.37701187, -0.36465186, 0.16052358, -0.06683314, 0.16916892, 0.23348652, -0.17332914, 0.007918098, -0.055450343, 0.12134491, 0.002598775, 0.050541576, 0.16586582, -0.08609246));
	conv2d_2_tf += mul(g2, min16float4x4(0.055008903, -0.038048673, 0.12065314, -0.034293417, 0.026340824, 0.0635937, 0.0072025824, 0.1099919, -0.022833373, 0.06988719, 0.098066956, 0.09838032, 0.027212605, -0.10769643, 0.025036965, 0.07822364));
	conv2d_2_tf += mul(h2, min16float4x4(0.12550583, -0.015590264, -0.20009072, -0.2595937, -0.040573828, -0.07032441, 0.13428123, 0.0024277875, -0.103335135, -0.08461066, 0.05634581, -0.113169014, 0.05591198, -0.16420694, -0.06915715, -0.19497992));
	conv2d_2_tf += mul(i2, min16float4x4(0.088232726, -0.05118527, 0.119473234, 0.23262945, 0.06746001, 0.14686997, -0.25685823, 0.08349066, 0.120035954, 0.11132579, 0.1024914, 0.008478224, -0.054700002, -0.029634893, 0.050064556, -0.08939752));
	conv2d_2_tf += mul(a3, min16float4x4(0.022236016, -0.14602192, -0.011037687, 0.09127931, 0.14263593, 0.2303995, -0.07378528, 0.07426219, -0.25500375, 0.18845809, -0.065374866, 0.016772734, 0.02813176, 0.15021992, -0.032982655, 0.0046127643));
	conv2d_2_tf += mul(b3, min16float4x4(0.110158965, 0.02073459, 0.1380525, 0.017634321, -0.3546499, -0.099760525, -0.1195462, 0.057210118, -0.53130746, 0.23352407, -0.18252264, -0.19651698, -0.10013627, -0.006907238, -0.022171183, 0.023419948));
	conv2d_2_tf += mul(c3, min16float4x4(-0.009217382, 0.00943576, 0.005295363, 0.010542551, -0.21079898, -0.14469005, -0.19105618, 0.2098414, 0.18261504, 0.19765937, 0.044775106, -0.25939676, 0.072466746, -0.08828442, 0.066161856, 0.05692894));
	conv2d_2_tf += mul(d3, min16float4x4(-0.051633067, 0.019243274, 0.28932014, -0.029704608, -0.06255436, -0.4573925, -0.10963281, 0.121834375, 0.10874706, -0.093909726, 0.06983889, 0.048236616, -0.15379356, -0.06354611, -0.10668147, -0.02901699));
	conv2d_2_tf += mul(e3, min16float4x4(-0.044167574, 0.022249546, -0.3618917, -0.054136246, -0.105739385, -0.22325896, -0.070169605, -0.19650152, 0.07689512, -0.17047665, -0.07742679, 0.031095566, -0.01903123, -0.033752028, -0.2286711, 0.044381924));
	conv2d_2_tf += mul(f3, min16float4x4(-0.05709193, 0.15251294, -0.16776492, 0.09025173, 0.18235344, 0.3685535, -0.053927444, 0.10351524, -0.0938133, -0.26824594, -0.036424845, -0.106756285, -0.13051414, -0.07613318, -0.10721611, -0.13445549));
	conv2d_2_tf += mul(g3, min16float4x4(-0.0268394, 0.017245602, 0.1185864, 0.031915247, -0.037321728, 0.037805032, 0.13701047, 0.025731707, 0.03791209, -0.16549957, 0.08953334, -0.13901101, -0.1287722, 0.072961085, 0.06859001, 0.18934746));
	conv2d_2_tf += mul(h3, min16float4x4(-0.11152981, 0.13712928, -0.05714947, 0.05542204, -0.32208005, -0.015176284, 0.10014709, -0.030125491, -0.04422843, 0.12897238, 0.108573034, -0.025267191, 0.02247499, -0.058167085, -0.15205052, 0.043249656));
	conv2d_2_tf += mul(i3, min16float4x4(-0.12951276, -0.14417744, 0.012708804, -0.0040302873, 0.09192804, -0.092346616, -0.09659876, -0.13512622, -0.0737095, 0.002481852, 0.048459593, 0.05455724, -0.14035852, 0.07777282, 0.07471883, 0.107781895));
	conv2d_2_tf += mul(na1, min16float4x4(0.028793033, -0.039604917, -0.0045903274, -0.05023892, 0.04976248, -0.026074547, 0.1733191, -0.06694405, -0.12434122, 0.12477937, -0.105804294, 0.06170465, 0.33725888, -0.15944988, 0.09790923, 0.030690596));
	conv2d_2_tf += mul(nb1, min16float4x4(0.005191585, 0.08373177, -0.018288689, 0.020527333, -0.055718876, -0.12754384, 0.17755422, 0.1597085, 0.17601304, -0.0258804, 0.16454586, 0.106551126, -0.20891763, -0.05360957, -0.24229631, -0.15886526));
	conv2d_2_tf += mul(nc1, min16float4x4(0.03740399, -0.0043318006, -0.010840595, -0.01674406, -0.17876416, 0.09188681, -0.12203759, -0.09808559, 0.1243873, -0.184597, 0.07484877, 0.14448164, -0.15161137, 0.033237204, -0.054772068, -0.085399576));
	conv2d_2_tf += mul(nd1, min16float4x4(0.071139924, 0.025827989, 0.021663137, -0.12484576, -0.07799051, 0.20053016, 0.014714873, -0.041652568, 0.046397317, -0.07650734, 0.06753141, 0.080667324, 0.4557549, -0.029605106, -0.25674006, -0.27842438));
	conv2d_2_tf += mul(ne1, min16float4x4(0.16805562, -0.03722638, 0.021958483, -0.04969856, -0.15340807, -0.22158863, -0.25280216, -0.024268134, 0.085401855, 0.22427009, -0.04698029, -0.071075134, -0.10739174, 0.030285811, 0.31068414, 0.2882289));
	conv2d_2_tf += mul(nf1, min16float4x4(-0.010069354, -0.045132317, -0.08054911, 0.19212297, -0.11246117, 0.203382, 0.10145021, 0.1476792, -0.022835081, 0.16916804, -0.018178321, 0.076025024, -0.29570428, -0.007177177, -0.1047155, -0.0178633));
	conv2d_2_tf += mul(ng1, min16float4x4(0.076137505, 0.117270656, -0.077183075, -0.052782975, -0.08236995, 0.053947527, 0.13501388, 0.17139077, -0.2424162, -0.15007298, 0.123724684, 0.09327283, 0.19777925, 0.07314544, -0.18668725, -0.010371631));
	conv2d_2_tf += mul(nh1, min16float4x4(0.15866037, 0.053233996, -0.026709981, -0.1574147, -0.012303242, 0.06893102, 0.031804018, 0.10116885, -0.016902728, -0.082480945, 0.05133729, -0.20160739, -0.012635841, 0.032104325, 0.00968726, -0.018941477));
	conv2d_2_tf += mul(ni1, min16float4x4(-0.02683365, 0.14024723, 0.0020279875, 0.035137076, -0.019948762, 0.3120297, -0.018649966, -0.17814124, -0.14863688, -0.12977526, -0.09194036, 0.19637106, 0.12040974, 0.09383599, 0.10559805, -0.0319509));
	conv2d_2_tf += mul(na2, min16float4x4(-0.07015076, -0.07818044, 0.12413185, -0.0018199648, -0.015275738, -0.21548629, 0.046161238, -0.10475311, 0.082367115, 0.0053079966, 0.09559984, 0.039583992, -0.1681236, -0.23862287, -0.09229484, -0.12317666));
	conv2d_2_tf += mul(nb2, min16float4x4(-0.17587087, -0.097817905, 0.08857801, 0.14012139, -0.20023742, 0.029083535, 0.056073546, -0.06810832, 0.08625035, 0.023427716, 0.1797412, 0.048568305, -0.09278378, -0.09250215, -0.12440772, 0.2587798));
	conv2d_2_tf += mul(nc2, min16float4x4(-0.24181388, -0.016290328, -0.026988767, -0.005399553, -0.061761368, -0.0013004051, -0.1990831, -0.07799404, 0.03282008, 0.079514205, -0.07474829, -0.36701006, 0.078521594, -0.156468, 0.09041213, 0.1292482));
	conv2d_2_tf += mul(nd2, min16float4x4(-0.21960634, 0.041841425, 0.122728646, 0.06800145, 0.07355482, 0.26123464, -0.13518283, -0.05085496, -0.099832244, 0.04960356, 0.066544525, 0.09741243, -0.10965899, -0.16163626, 0.09816793, -0.014595947));
	conv2d_2_tf += mul(ne2, min16float4x4(0.07614604, -0.062298786, -0.07941662, -0.22525579, -0.29955792, 0.11145522, 0.123146005, 0.13863817, 0.15309983, 0.025902487, -0.08610474, -0.07598799, -0.26134565, -0.2818921, 0.0046356185, 0.007307074));
	conv2d_2_tf += mul(nf2, min16float4x4(-0.15936229, -0.10145381, 0.058567517, 0.21258314, -0.18010478, -0.22477242, -0.039975245, -0.34447697, -0.21647838, 0.31467855, -0.0674453, -0.5146147, 0.05382176, -0.026282668, -0.24090777, 0.10222359));
	conv2d_2_tf += mul(ng2, min16float4x4(-0.1045028, -0.027515164, 0.013251722, 0.108239084, 0.03163253, -0.030052185, 0.10836872, 0.15349132, 0.09593661, 0.0062710177, -0.19837233, -0.098303355, -0.23947543, -0.04082913, 0.16908304, -0.031784274));
	conv2d_2_tf += mul(nh2, min16float4x4(-0.07773699, 0.30408737, 0.10054892, 0.36721498, 0.51369953, -0.11931886, -0.17019019, -0.3288588, 0.11095048, -0.29225063, -0.075574756, -0.18392691, -0.10289336, 0.06882282, 0.20403436, 0.12073833));
	conv2d_2_tf += mul(ni2, min16float4x4(0.024539007, 0.053005982, -0.099204265, -0.084534295, -0.2587164, -0.31929657, 0.07193254, 0.18271501, -0.043669797, 0.062497724, -0.055462, 0.057130013, -0.015285072, -0.030743862, -0.07051513, -0.13783172));
	conv2d_2_tf += mul(na3, min16float4x4(-0.4343681, 0.35928357, -0.004770178, -0.079942055, 0.014088603, -0.20866469, -0.1378781, -0.06831558, 0.21436058, -0.08427488, 0.2455502, -0.065596916, -0.06559933, -0.027101375, 0.023555819, -0.20939256));
	conv2d_2_tf += mul(nb3, min16float4x4(-0.37720296, -0.111260146, -0.25392932, -0.33377793, -0.17806955, -0.008747484, 0.17404033, 0.058826912, 0.0039355545, -0.18436235, 0.15803719, 0.15143508, 0.11155828, 0.09333553, -0.17960371, -0.036842924));
	conv2d_2_tf += mul(nc3, min16float4x4(-0.087490946, 0.0959697, -0.08301798, -0.19364063, -0.00996324, 0.014655412, 0.021732382, 0.07269497, 0.012744119, 0.01542146, 0.109438084, 0.18674947, -0.05728511, 0.017406877, 0.036412247, -0.044986803));
	conv2d_2_tf += mul(nd3, min16float4x4(0.30902067, 0.25019556, -0.079495244, -0.26099077, 0.08450634, -0.08346094, 0.004498276, -0.119334444, -0.08587327, -0.019446453, -0.1811446, -0.16136086, 0.006683898, 0.0005228834, -0.11937812, -0.2045503));
	conv2d_2_tf += mul(ne3, min16float4x4(0.19326456, -0.052496854, 0.12926556, 0.10167019, 0.090374604, 0.07595169, -0.0048561483, 0.12414255, 0.19320521, -0.027459998, 0.08993327, -0.035830285, 0.006461366, 0.023297347, 0.0691706, -0.00831113));
	conv2d_2_tf += mul(nf3, min16float4x4(0.13971736, 0.0788502, 0.12267767, 0.004433991, -0.053574555, -0.08087108, -0.26019198, -0.04175351, -0.13934188, 0.04144695, -0.070562504, -0.068388134, -0.1347503, -0.02173245, -0.1099242, -0.020897312));
	conv2d_2_tf += mul(ng3, min16float4x4(0.07843604, 0.04441641, -0.016214373, -0.15351163, -0.021339556, 0.023823377, -0.01442564, -0.09113205, -0.02552644, 0.14885889, -0.16178642, 0.14472331, 0.14082494, 0.05760455, -0.11503234, -0.16907685));
	conv2d_2_tf += mul(nh3, min16float4x4(-0.042953692, -0.3268466, 0.13181087, -0.06399399, 0.17543526, 0.111214496, 0.07369484, -0.003378238, 0.040965978, -0.0073295045, 0.07711077, -0.033094298, -0.08758825, -0.01715938, 0.056862406, -0.010732023));
	conv2d_2_tf += mul(ni3, min16float4x4(-0.039256442, -0.07153648, 0.10314899, -0.1192048, -0.033410206, 0.13077301, 0.19343375, -0.07479033, 0.10759806, -0.037313893, 0.06156247, 0.021744521, -0.18148352, -0.15683053, 0.017884498, -0.11338723));
	conv2d_2_tf += min16float4(-0.077597156, 0.024995416, 0.0048880246, -0.06210122);
	min16float4 nconv2d_2_tf = max(-conv2d_2_tf, 0);
	conv2d_2_tf = max(conv2d_2_tf, 0);
	
	min16float4 conv2d_1_tf = mul(a1, min16float4x4(0.10368956, 0.09174666, 0.07265347, 0.009965846, 0.04307676, 0.018726716, 0.064217605, -0.024381645, 0.013237381, 0.039251406, 0.13164084, -0.05265028, -0.08619517, 0.015469731, 0.10171868, -0.11194108));
	conv2d_1_tf += mul(b1, min16float4x4(-0.055484463, 0.1386706, -0.22939423, -0.2222723, 0.04815343, 0.05425625, 0.08234074, 0.12962975, 0.030559294, -0.07823733, 0.12347866, -0.13917705, -0.031347297, 0.010592373, -0.38942683, -0.302033));
	conv2d_1_tf += mul(c1, min16float4x4(0.06968848, -0.03574659, 0.11817242, 0.044270225, 0.0481696, 0.045347195, -0.14479072, 0.06971279, 0.012434736, 0.03927546, 0.13076504, 0.032268204, 0.040274065, 0.053418823, -0.05195065, 0.1341056));
	conv2d_1_tf += mul(d1, min16float4x4(0.1314648, 0.08953099, -0.058160458, -0.098807305, -0.08652445, -0.19136623, -0.012327089, 0.14297265, 0.11436408, 0.031837817, -0.0038611747, 0.08295747, 0.19534546, -0.033664998, -0.51042134, -0.21606028));
	conv2d_1_tf += mul(e1, min16float4x4(-0.332711, -0.2260786, 0.35732532, 0.026584813, 0.16421017, 0.21153966, -0.112725854, -0.048803244, 0.059562314, -0.010458478, 0.0063304375, -0.007279937, -0.41918445, 0.10137393, -0.0989079, -0.17768846));
	conv2d_1_tf += mul(f1, min16float4x4(-0.22947264, 0.008074958, -0.03876367, 0.28019628, -0.18640186, 0.072562195, -0.001338717, 0.17349707, 0.13131878, 0.05085823, -0.11547487, -0.084437385, -0.18131672, 0.026830718, 0.0960529, -0.014084568));
	conv2d_1_tf += mul(g1, min16float4x4(0.13153158, 0.079937235, -0.14291838, -0.062477887, -0.0690248, 0.15090927, 0.060723048, -0.044703092, 0.005483621, -0.113471694, 0.048640195, -0.024538955, -0.01751092, 0.19206041, -0.1859277, -0.22007878));
	conv2d_1_tf += mul(h1, min16float4x4(-0.04971548, -0.38541326, -0.080354154, -0.1132633, -0.13348146, 0.11406493, 0.05543971, 0.022810424, -0.09030199, -0.053045455, -0.084034644, 0.0014670533, 0.0007018557, -0.24078067, 0.047226585, 0.08619653));
	conv2d_1_tf += mul(i1, min16float4x4(-0.08993396, -0.09246378, 0.11467184, 0.060891952, -0.022887891, -0.008537377, 0.13542707, 0.08030356, -0.06174077, -0.07314582, -0.111782126, -0.08939319, -0.09756803, -0.15771574, 0.073002145, 0.035939205));
	conv2d_1_tf += mul(a2, min16float4x4(-0.09398606, -0.118093155, 0.024832802, 0.049131367, 0.06665196, -0.039545495, -0.107865654, -0.043897964, -0.03278348, -0.111089505, 0.12056342, -0.10977613, -0.05880801, -0.08684503, -0.15480064, -0.09669209));
	conv2d_1_tf += mul(b2, min16float4x4(-0.12028866, -0.0130571015, 0.010480521, 0.28919983, 0.050575808, -0.07968808, -0.15499628, -0.13613448, 0.030993043, 0.13226634, -0.12666325, -0.010337325, -0.025353834, 0.017561335, -0.08171704, -0.17280379));
	conv2d_1_tf += mul(c2, min16float4x4(-0.0008190666, 0.017923795, -0.13926646, -0.00083633314, -0.14120303, 0.109396234, 0.026602108, 0.2108425, 0.15093753, -0.0016773659, 0.028220268, 0.09914804, -0.045055833, 0.040082425, 0.007756443, -0.04522211));
	conv2d_1_tf += mul(d2, min16float4x4(0.059589684, 0.04780217, 0.30785602, 0.25626636, 0.08686253, 0.11348654, 0.042249523, -0.2264382, -0.058502045, 0.05044742, 0.0031711252, -0.021721566, -0.011926813, 0.042892855, -0.08586602, -0.029168598));
	conv2d_1_tf += mul(e2, min16float4x4(0.09367661, -0.019030625, -0.34638473, -0.10968469, -0.16300671, 0.21311292, 0.11657136, -0.044009518, 0.10225506, -0.044505168, 0.20920436, -0.018161744, -0.018144146, 0.026626088, -0.056913715, 0.15370414));
	conv2d_1_tf += mul(f2, min16float4x4(-0.28757727, 0.14743091, -0.021321807, -0.048045393, -0.109708, -0.14760888, 0.15246773, -0.028329216, 0.009206364, -0.06396112, 0.12593451, 0.052947026, 0.066429235, -0.08044728, 0.0070432564, -0.057647638));
	conv2d_1_tf += mul(g2, min16float4x4(-0.023919886, -0.20876022, 0.05590491, 0.12671952, -0.07277091, 0.024939056, 0.03633482, -0.10239475, -0.12012349, -0.17192347, 0.014865882, 0.1858935, -0.013352806, -0.04451544, 0.0032296637, 0.09310079));
	conv2d_1_tf += mul(h2, min16float4x4(-0.29340369, 0.1377685, -0.018134177, -0.0819466, 0.2541578, -0.1270915, -0.12300359, 0.114513785, 0.21511158, -0.060876742, 0.07682154, 0.09775888, -0.09133818, 0.04477866, 0.058042303, -0.027626123));
	conv2d_1_tf += mul(i2, min16float4x4(-0.098641984, -0.09568759, 0.27307647, 0.044102278, -0.03640084, -0.10440432, -0.011212675, -0.22568303, -0.008232321, 0.14870772, -0.17107275, -0.023316732, 0.03395947, 0.14223643, -0.08063479, 0.14301774));
	conv2d_1_tf += mul(a3, min16float4x4(-0.08714423, -0.12230681, -0.22175795, -0.10298021, 0.0009175108, 0.19820437, 0.04215484, 0.2772454, 0.046766162, 0.023245906, 0.36313313, -0.29657102, 0.0010776661, 0.047935788, 0.113361314, -0.05614472));
	conv2d_1_tf += mul(b3, min16float4x4(0.15069975, 0.06458973, 0.08984772, -0.08219822, -0.37328726, -0.03008995, 0.31162828, 0.07075847, -0.13914284, -0.10216768, 0.22251949, -0.30631062, 0.17172062, 0.058428258, -0.11345689, 0.08461611));
	conv2d_1_tf += mul(c3, min16float4x4(0.007734305, 0.042484675, -0.15685312, -0.048171967, 0.10970874, 0.061090663, -0.08464978, 0.08347133, -0.17933917, 0.2308347, -0.053314723, 0.09323812, -0.04228206, 0.055042125, -0.046495847, -0.032692812));
	conv2d_1_tf += mul(d3, min16float4x4(-0.09439761, 0.03567186, -0.17220385, -0.103939146, -0.064900115, -0.16004047, 0.004621011, -0.014501001, -0.14071538, -0.05238438, -0.04519603, 0.21972013, -0.007383857, -0.07692677, -0.14034486, 0.08030412));
	conv2d_1_tf += mul(e3, min16float4x4(-0.22748968, 0.12067121, -0.05225513, 0.04308743, -0.081648685, 0.28658885, 0.37694585, -0.018508147, -0.019247225, 0.095557846, 0.015747357, 0.12365868, -0.076417744, -0.03912286, 0.18391648, -0.09244896));
	conv2d_1_tf += mul(f3, min16float4x4(-0.00221828, -0.0894836, 0.038467363, -0.019945016, 0.13546647, 0.17713489, -0.17275713, 0.08575425, -0.019129591, 0.16340882, -0.16357088, -0.0033604207, -0.06446814, -0.15712759, 0.18558913, -0.115558594));
	conv2d_1_tf += mul(g3, min16float4x4(-0.09995351, 0.18885328, -0.057601925, 0.01172547, -0.031203317, -0.1181948, 0.006120215, 0.25098777, -0.06316651, 0.047607217, -0.056073133, -0.029685916, 0.12195799, -0.056664392, -0.054523658, 0.03753435));
	conv2d_1_tf += mul(h3, min16float4x4(0.007936505, -0.021070726, 0.040594626, 0.061293513, -0.074233375, 0.10112329, -0.19424592, -0.14433385, -0.04661142, -0.09192385, 0.034151867, -0.11941847, 0.046759605, -0.15323174, 0.09908571, 0.18290807));
	conv2d_1_tf += mul(i3, min16float4x4(-0.012291647, 0.114136524, 0.10576901, -0.012061901, 0.2356885, 0.048024837, 0.18102467, -0.034004245, -0.06746709, 0.09405117, 0.12362687, 0.0254422, 0.22654915, 0.04224264, -0.049588405, 0.11478716));
	conv2d_1_tf += mul(na1, min16float4x4(-0.021690933, 0.13663062, -0.161411, 0.06806553, -0.1773275, -0.0940566, -0.18002738, 0.047475196, 0.0072157113, -0.008688586, -0.15493456, 0.022294179, 0.041401867, -0.10311516, -0.006603416, 0.059536614));
	conv2d_1_tf += mul(nb1, min16float4x4(-0.13541889, 0.047185, -0.027699882, 0.060225613, -0.035152074, 0.05752177, -0.026204573, 0.11251955, -0.0049166707, 0.17533402, -0.15755837, 0.16124752, 0.04805776, -0.10309488, 0.15945134, 0.025226792));
	conv2d_1_tf += mul(nc1, min16float4x4(-0.015074193, -0.094979845, 0.027753184, -0.071142055, -0.17082961, -0.06833402, 0.13620014, -0.24564765, 0.036582932, 0.13075556, 0.036705326, 0.03863992, -0.018921472, -0.0016482361, 0.13597268, -0.038188133));
	conv2d_1_tf += mul(nd1, min16float4x4(-0.14212462, -0.1483275, 0.05649678, 0.05684924, -0.11407954, 0.13978885, 0.070467845, -0.07458527, -0.19702937, 0.23950967, -0.15242746, -0.26435548, -0.14437793, 0.21487178, 0.4991241, 0.18331984));
	conv2d_1_tf += mul(ne1, min16float4x4(0.20045248, 0.066468574, -0.015601024, 0.012849705, -0.14952832, -0.06828453, 0.16009094, -0.09515789, -0.1071139, -0.021629127, -0.012993768, -0.022518635, 0.19255438, -0.09875012, 0.07555782, 0.0780372));
	conv2d_1_tf += mul(nf1, min16float4x4(-0.028311213, -0.025465565, 0.020059558, -0.116105095, -0.042490575, 0.020179577, 0.010893176, -0.11184776, -0.1702318, -0.025035636, 0.008381181, 0.0586714, 0.03539251, -0.0448198, -0.056921933, -0.029987138));
	conv2d_1_tf += mul(ng1, min16float4x4(0.049813945, 0.08434948, 0.09337763, 0.06701621, -0.061224304, -0.24754077, -0.017353527, -0.042758185, 0.013161995, -0.22947139, 0.019135898, 0.11039477, 0.16954716, -0.25619635, 0.18368678, 0.03542052));
	conv2d_1_tf += mul(nh1, min16float4x4(-0.15430786, 0.07348774, 0.15545642, 0.20969617, 0.1067826, 0.15255202, 0.020220853, 0.09658389, -0.088782035, -0.19119574, 0.13885954, 0.15108526, -0.07552868, -0.11574438, -0.034102093, -0.031383175));
	conv2d_1_tf += mul(ni1, min16float4x4(0.061409608, -0.00082869077, -0.08336049, -0.01866603, 0.07322213, -0.1152386, -0.004205211, -0.18793713, 0.091782115, 0.05387527, 0.069104694, 0.25387684, -0.101916246, 0.065856785, -0.020407397, 0.035098225));
	conv2d_1_tf += mul(na2, min16float4x4(0.06225989, -0.039721318, 0.19908188, 0.08382035, -0.024357362, 0.014932128, -0.060558856, -0.049815435, -0.03166011, 0.0339055, -0.12810327, 0.008812703, 0.06120202, 0.085533425, 0.21571258, -0.20605975));
	conv2d_1_tf += mul(nb2, min16float4x4(-0.045329664, 0.02261115, -0.0335033, -0.058562186, -0.0099387, 0.0046313554, 0.21475597, 0.04558062, 0.17891279, 0.005057579, 0.22518916, 0.1998231, 0.09627137, -0.2318303, -0.08868813, -0.27863982));
	conv2d_1_tf += mul(nc2, min16float4x4(-0.15865076, 0.077262044, 0.036153752, 0.07885703, 0.13166751, -0.12820594, -0.05823962, -0.2583444, -0.2245552, -0.04434666, -0.13453422, -0.27865237, 0.014107271, 0.045582164, 0.0064884513, -0.019007552));
	conv2d_1_tf += mul(nd2, min16float4x4(0.0643133, 0.06440001, -0.14517003, -0.101694606, 0.058990445, 0.11955667, 0.45094532, 0.20261864, 0.07944409, -0.061399437, 0.022036074, 0.046660237, -0.17064287, -0.076766625, 0.25972953, 0.29821205));
	conv2d_1_tf += mul(ne2, min16float4x4(-0.11031386, -0.05850727, 0.055557184, 0.11549242, 0.12120408, -0.33330265, 0.095613986, 0.09242419, -0.011835885, -0.19384164, -0.01893125, 0.27290896, -0.18104021, 0.044360142, 0.06759539, -0.0027218745));
	conv2d_1_tf += mul(nf2, min16float4x4(0.19390257, -0.13378039, 0.07428329, 0.016053686, -0.18574655, 0.055462763, -0.2527128, -0.47279125, -0.17490762, 0.21626428, -0.1473371, -0.35594228, 0.054865763, -0.04086486, -0.061911695, 0.051812805));
	conv2d_1_tf += mul(ng2, min16float4x4(-0.029701848, 0.24927482, 0.00581731, -0.10748679, -0.07500632, 0.033424605, 0.14734372, -0.18966366, 0.031880617, 0.17622112, -0.031867832, -0.10119831, -0.15391265, -0.14308685, 0.093484215, 0.18867014));
	conv2d_1_tf += mul(nh2, min16float4x4(0.19035357, -0.19525306, -0.025621792, 0.09154427, -0.07798503, -0.22271548, 0.11034287, -0.04197031, -0.24772005, 0.43681505, -0.19703668, -0.2614237, 0.05807699, -0.2631317, -0.020604266, -0.048005704));
	conv2d_1_tf += mul(ni2, min16float4x4(-0.08587588, 0.13374045, -0.09263761, -0.13216262, -0.11242246, -0.12541875, -0.09835177, 0.1586739, -0.21013282, 0.087373346, 0.107112356, 0.47657737, 0.0459955, -0.07181196, 0.07818155, -0.10435423));
	conv2d_1_tf += mul(na3, min16float4x4(-0.091803394, -0.32280564, 0.28972253, 0.12908047, 0.06683764, -0.039376236, 0.024078066, 0.18940936, -0.055246543, 0.12222864, -0.0177199, 0.09346665, 0.07164098, 0.065791056, -0.08516637, -0.10187257));
	conv2d_1_tf += mul(nb3, min16float4x4(-0.12561126, -0.28730518, 0.190799, -0.17922764, 0.04376582, -0.08152354, -0.0690038, -0.10861494, -0.03100546, 0.10962334, -0.20492296, 0.12868984, 0.06536495, 0.08559974, 0.033028, -0.07235402));
	conv2d_1_tf += mul(nc3, min16float4x4(-0.012734173, -0.12211726, 0.057524282, 0.015053666, -0.052275516, 0.11774483, 0.08221696, -0.024205929, 0.122006595, 0.054565493, -0.049608365, 0.02801238, 0.07593017, 0.074450806, 0.097137615, -0.008985974));
	conv2d_1_tf += mul(nd3, min16float4x4(-0.32826158, -0.022971062, 0.37642807, 0.38614145, -0.06932448, 0.0641898, -0.09011684, -0.019884817, -0.004897904, 0.07661578, -0.050405186, -0.24849766, 0.04642452, 0.09120379, 0.26060387, -0.2533109));
	conv2d_1_tf += mul(ne3, min16float4x4(0.09669597, -0.045555357, -0.24132517, -0.28401875, 0.11226361, 0.08378312, -0.07415474, -0.036874313, -0.001286788, 0.14013582, 0.14750466, -0.048925027, 0.13374946, 0.10844033, 0.123459235, -0.10933974));
	conv2d_1_tf += mul(nf3, min16float4x4(-0.03275827, 0.27429518, -0.0983686, -0.010947437, -0.18409865, 0.12616666, -0.05766888, 0.07149005, -0.13777009, 0.022123039, 0.084938325, 0.015972659, 0.20145003, -0.09534558, -0.0082679195, -0.1515079));
	conv2d_1_tf += mul(ng3, min16float4x4(0.13148536, -0.3421452, 0.08851102, 0.012056574, -0.1525749, 0.09364548, -0.02235517, -0.1775178, 0.18052714, -0.14639667, 0.07453223, 0.03912742, -0.284782, 0.023833552, 0.09671063, -0.168578));
	conv2d_1_tf += mul(nh3, min16float4x4(-0.24303597, -0.05585747, -0.21645154, -0.084838174, -0.15413773, -0.15403214, -0.021544017, 0.15751824, -0.027032627, -0.18457665, -0.02174098, -0.0070916233, -0.1609649, -0.32226282, -0.18423033, -0.29629233));
	conv2d_1_tf += mul(ni3, min16float4x4(0.1602529, 0.026087781, 0.01551678, 0.07093837, -0.007075046, -0.0061597642, -0.0057887356, -0.08935906, 0.0028665168, -0.1038671, -0.093715765, -0.035213456, -0.041290607, -0.15825188, 0.11327359, -0.20286629));
	conv2d_1_tf += min16float4(-0.062293675, 0.09216847, 0.010529031, 0.03100192);
	tex4[gxy] = conv2d_1_tf;
	min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0);
	conv2d_1_tf = max(conv2d_1_tf, 0);

	min16float4 target = mul(e1, min16float4x4(-0.02357968, 0.13800439, 0.054744735, -0.32328397, -0.2263118, -0.3222542, -0.15286992, -0.3053175, -0.20046607, 0.025345843, 0.032755207, 0.40165102, 0.03166696, 0.29110438, 0.28861988, 0.05585125));
	target += mul(e2, min16float4x4(0.11055126, -0.33034575, 0.039494887, -0.17843343, 0.35742196, 0.00032650787, 0.21049741, 0.18823248, -0.1741954, 0.27586365, -0.043366615, 0.02092058, -0.082515135, -0.15504313, 0.13261497, 0.14650741));
	target += mul(e3, min16float4x4(0.39276633, -0.031067554, -0.08830738, -0.23975314, -0.20294978, 0.030291535, 0.4623106, 0.06494191, 0.042467684, -0.28105733, -0.053258326, -0.17269841, 0.09479501, 0.11930515, 0.1258843, 0.11058792));
	target += mul(ne1, min16float4x4(-0.18343425, -0.4381688, -0.08248827, -0.42846557, -0.08277779, 0.45192116, 0.21961756, 0.23076119, -0.2093829, -0.29050866, 0.26212537, -0.25469857, -0.4832557, -0.45126852, -0.35072148, -0.18368497));
	target += mul(ne2, min16float4x4(0.10529696, 0.5964488, 0.13258573, -0.07494986, -0.3341919, 0.19418421, -0.18307082, 0.34982273, -0.0430461, 0.21097268, 0.03212202, -0.015623122, 0.43791813, 0.16207397, 0.123477034, -0.087993294));
	target += mul(ne3, min16float4x4(-0.01878982, 0.007308694, 0.25769314, 0.18407181, 0.00095180905, -0.2600526, -0.31043288, -0.24622385, 0.07832029, 0.05502411, 0.37793204, -0.07329948, -0.28405467, -0.15038961, 0.19259417, 0.105486296));
	target += mul(conv2d_2_tf, min16float4x4(0.047820415, 0.3303589, 0.035807017, -0.41168606, -0.2118325, -0.045765184, -0.15234827, 0.28021428, -0.2084036, -0.40200952, -0.3261011, -0.13480914, -0.06876906, -0.19167677, -0.20444186, -0.44851676));
	target += mul(nconv2d_2_tf, min16float4x4(-0.24726203, -0.0097923195, -0.23193192, 0.31947026, 0.4274281, -0.36929542, 0.10095328, -0.19663717, 0.3244895, 0.49458218, 0.24745567, 0.15722558, 0.43052208, 0.377559, 0.22543637, 0.13009055));
	target += mul(conv2d_1_tf, min16float4x4(0.01817998, 0.111477636, -0.12727399, 0.27395004, 0.19770023, -0.1636959, 0.25407487, -0.24871433, -0.08552937, 0.3223687, 0.30668882, 0.40221208, -0.20192504, 0.14656074, 0.5100356, -0.0948956));
	target += mul(nconv2d_1_tf, min16float4x4(0.40383592, -0.043663148, 0.4813348, 0.10317451, -0.049076255, -0.022925228, 0.0872564, 0.21741754, 0.23656987, -0.22309794, -0.2260013, 0.20823886, -0.055542476, 0.016604664, -0.1964831, 0.11962174));
	target += min16float4(-0.049604952, -0.039514415, -0.06137416, -0.0015509313);
	tex5[gxy] = target;
	
	target = mul(e1, min16float4x4(0.029635962, 0.08045753, 0.03622311, 0.06677362, 0.14780864, -0.087087184, 0.22309896, -0.1772139, -0.08716722, 0.1075154, 0.044472143, 0.021324798, 0.10346262, -0.24718447, -0.2489118, 0.4517737));
	target += mul(e2, min16float4x4(0.20637918, -0.11695054, 0.27656725, 0.009858572, -0.62555677, 0.12796827, -0.057749186, -0.02636826, 0.11764726, -0.034879886, -0.062285252, -0.048256125, 0.37146622, -0.17392562, 0.24782267, 0.3184173));
	target += mul(e3, min16float4x4(0.2624149, 0.007052751, 0.1595428, 0.26269603, -0.33775207, -0.66331345, 0.18036188, -0.25012106, -0.15003558, 0.12337829, -0.3230818, 0.06187628, 0.096601635, 0.24300486, -0.13784438, 0.27110842));
	target += mul(ne1, min16float4x4(-0.180413, 0.039972585, 0.48966697, -0.4130023, -0.03654654, -0.27514896, -0.025462124, 0.06652415, 0.28900522, 0.035381883, 0.20655172, 0.0073647103, -0.5028713, -0.0061578755, -0.09185675, -0.52771837));
	target += mul(ne2, min16float4x4(-0.3205473, -0.23172325, -0.20749244, 0.058195353, 0.20280065, -0.106998004, 0.08968707, 0.10981961, -0.13291806, 0.0028465164, 0.11793527, 0.11942547, 0.100123264, -0.14852245, -0.032194547, -0.118260525));
	target += mul(ne3, min16float4x4(0.004620961, -0.13271236, 0.110130526, -0.075169735, 0.35998157, -0.046072174, 0.02044828, -0.1019322, -0.038753018, -0.12328749, -0.28227237, 0.18373057, -0.23704045, 0.20384738, 0.097455874, -0.23102747));
	target += mul(conv2d_2_tf, min16float4x4(0.30397, -0.007688397, -0.2519374, -0.14401323, -0.031671453, 0.10171321, -0.18295656, -0.029794114, 0.19171898, 0.23662621, 0.09319509, -0.3479054, 0.036986895, 0.13572362, 0.1142681, -0.17851138));
	target += mul(nconv2d_2_tf, min16float4x4(-0.19525734, 0.36855492, 0.05751295, -0.12524441, 0.06309533, 0.20228319, -0.07533531, 0.26733333, -0.21407285, -0.2900094, -0.28743416, 0.18039729, -0.27968687, -0.23786859, -0.21049118, -0.006130187));
	target += mul(conv2d_1_tf, min16float4x4(0.34406897, -0.14967814, 0.56049985, -0.18166065, -0.061995413, 0.117799215, 0.3054206, 0.4034068, -0.2116504, -0.6017806, 0.004660423, 0.051566444, 0.4380975, -0.3172436, -0.09930328, -0.16182126));
	target += mul(nconv2d_1_tf, min16float4x4(-0.09316841, 0.036305115, -0.30209473, 0.098138526, -0.012532953, -0.050068337, -0.22571203, -0.30636647, -0.124337815, 0.07323685, -0.15504828, 0.19263308, -0.017216058, 0.34484297, -0.1460544, -0.24951003));
	target += min16float4(0.10388342, 0.00828351, 0.14884935, 0.034392886);
	tex6[gxy] = target;
	
	target = mul(e1, min16float4x4(-0.15275823, 0.31693572, 0.03429309, -0.06982273, 0.08535909, 0.019838037, -0.03189405, 0.3190016, 0.16633914, 0.48730284, -0.27923077, 0.31791112, 0.43154097, 0.005003616, -0.26277873, -0.009333685));
	target += mul(e2, min16float4x4(0.23504019, -0.12419379, 0.07217815, -0.090434305, -0.0380588, -0.14686479, -0.33812302, -0.20242776, -0.20776805, 0.24741934, -0.16489775, 0.07052134, -0.08030772, 0.23784883, -0.28709608, -0.17689173));
	target += mul(e3, min16float4x4(-0.05109775, -0.40860242, -0.003464472, -0.19893257, 0.23186824, -0.12760048, -0.22718583, 0.02299852, 0.27083093, 0.073904194, -0.056870755, -0.35324985, -0.023004858, -0.29591596, -0.020298446, -0.05753052));
	target += mul(ne1, min16float4x4(0.0035456547, -0.37682405, 0.047876693, 0.1168026, 0.015805494, -0.04388269, 0.12970346, 0.2497829, -0.009891778, 0.116980106, 0.13058232, 0.22570355, 0.13866597, 0.036246244, 0.10916998, -0.040503114));
	target += mul(ne2, min16float4x4(-0.25300103, -0.065156855, 0.063345924, 0.11406543, -0.1902478, 0.16440767, 0.043949526, 0.43318078, -0.03932035, -0.08510957, 0.19621156, -0.045045726, -0.08339006, -0.04335483, 0.37129655, -0.22328225));
	target += mul(ne3, min16float4x4(0.16169593, 0.2758587, 0.38249364, 0.12606645, 0.4582731, 0.09374545, -0.10988087, -0.21678255, -0.004099455, -0.09436347, 0.33964127, 0.20880581, -0.06742301, -0.025149476, 0.12146305, 0.5012377));
	target += mul(conv2d_2_tf, min16float4x4(0.11523535, 0.31662583, -0.0709322, -0.066175185, 0.08868106, -0.042457394, 0.32469732, -0.1987238, 0.41399983, 0.015568244, 0.14037918, 0.2879998, -0.32157704, 0.22491854, -0.07769691, 0.2052648));
	target += mul(nconv2d_2_tf, min16float4x4(-0.299831, -0.247278, -0.2011737, -0.3759366, -0.14935663, -0.095033385, 0.06259881, -0.23891686, -0.4340098, 0.07340212, -0.0012697511, -0.16527005, 0.0814454, -0.43962866, -0.3040046, 0.06242604));
	target += mul(conv2d_1_tf, min16float4x4(0.11802704, 0.2323739, 0.13466287, -0.25053164, -0.08020803, 0.1628004, -0.030645542, -0.40872335, -0.24624921, 0.15931502, 0.40752286, -0.07906199, 0.4286516, -0.1651973, -0.07021073, 0.0867332));
	target += mul(nconv2d_1_tf, min16float4x4(-0.23617363, 0.053548977, -0.14130518, -0.37744048, -0.11805406, -0.13757266, -0.026939899, 0.028020354, 0.24626125, -0.06998214, -0.02793638, 0.10509643, 0.06577935, -0.17211749, -0.12747282, -0.16999653));
	target += min16float4(-0.022106458, -0.012578552, 0.016203664, 0.026009269);
	tex7[gxy] = target;
}


//!PASS 3
//!DESC Conv-4x3x3x24, Conv-4x1x1x48
//!IN tex5, tex6, tex7, tex4
//!OUT tex8, tex1, tex2, tex3
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass3(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	min16float4 a1 = tex5.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b1 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c1 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d1 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e1 = tex5.SampleLevel(sam, pos, 0);
	min16float4 f1 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g1 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h1 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i1 = tex5.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na1 = max(-a1, 0);
	min16float4 nb1 = max(-b1, 0);
	min16float4 nc1 = max(-c1, 0);
	min16float4 nd1 = max(-d1, 0);
	min16float4 ne1 = max(-e1, 0);
	min16float4 nf1 = max(-f1, 0);
	min16float4 ng1 = max(-g1, 0);
	min16float4 nh1 = max(-h1, 0);
	min16float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);
	
	min16float4 a2 = tex6.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b2 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c2 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d2 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e2 = tex6.SampleLevel(sam, pos, 0);
	min16float4 f2 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g2 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h2 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i2 = tex6.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na2 = max(-a2, 0);
	min16float4 nb2 = max(-b2, 0);
	min16float4 nc2 = max(-c2, 0);
	min16float4 nd2 = max(-d2, 0);
	min16float4 ne2 = max(-e2, 0);
	min16float4 nf2 = max(-f2, 0);
	min16float4 ng2 = max(-g2, 0);
	min16float4 nh2 = max(-h2, 0);
	min16float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);
	
	min16float4 a3 = tex7.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b3 = tex7.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c3 = tex7.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d3 = tex7.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e3 = tex7.SampleLevel(sam, pos, 0);
	min16float4 f3 = tex7.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g3 = tex7.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h3 = tex7.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i3 = tex7.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na3 = max(-a3, 0);
	min16float4 nb3 = max(-b3, 0);
	min16float4 nc3 = max(-c3, 0);
	min16float4 nd3 = max(-d3, 0);
	min16float4 ne3 = max(-e3, 0);
	min16float4 nf3 = max(-f3, 0);
	min16float4 ng3 = max(-g3, 0);
	min16float4 nh3 = max(-h3, 0);
	min16float4 ni3 = max(-i3, 0);

	a3 = max(a3, 0);
	b3 = max(b3, 0);
	c3 = max(c3, 0);
	d3 = max(d3, 0);
	e3 = max(e3, 0);
	f3 = max(f3, 0);
	g3 = max(g3, 0);
	h3 = max(h3, 0);
	i3 = max(i3, 0);
	
	min16float4 conv2d_1_tf = tex4.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0);
	conv2d_1_tf = max(conv2d_1_tf, 0);

	min16float4 conv2d_5_tf = mul(a1, min16float4x4(0.001049049, 0.017747996, -0.067229465, -0.020442853, -0.04868684, 0.09733606, -0.07313501, 0.02070675, 0.01012683, -0.034293324, -0.026002094, 0.008298949, -0.045532364, -0.069049254, 0.109774776, -0.092840426));
	conv2d_5_tf += mul(b1, min16float4x4(0.3071666, 0.108723, -0.018787129, 0.17321438, -0.07934712, 0.11855833, -0.032467257, -0.048425578, -0.091413595, -0.08235019, -0.050003942, -0.007800964, -0.07821158, 0.120108165, -0.15341766, -0.04518874));
	conv2d_5_tf += mul(c1, min16float4x4(-0.0038995466, 0.059817232, -0.13333397, 0.022390908, -0.054531172, -0.12521502, 0.061349645, 0.08832908, 0.015541151, -0.005833245, -0.103023596, -0.031728514, -0.1393958, 0.12932369, -0.024058655, -0.02949061));
	conv2d_5_tf += mul(d1, min16float4x4(0.10676212, -0.0919305, -0.045313094, 0.036725752, -0.2360789, 0.08090541, 0.08044168, -0.088691026, 0.05462964, -0.047420587, 0.011766264, -0.044065233, -0.09330811, -0.04302891, -0.09276843, 0.01615573));
	conv2d_5_tf += mul(e1, min16float4x4(0.14728056, 0.014297587, 0.20523176, -0.016391741, -0.25267518, -0.09126818, 0.14681858, 0.0720258, -0.034471154, -0.103409246, 0.029827712, 0.09607032, -0.12944661, -0.09812552, 0.19399726, 0.18891408));
	conv2d_5_tf += mul(f1, min16float4x4(0.0793041, -0.18886381, -0.08229493, -0.13476922, -0.034637094, -0.06667868, 0.09988945, -0.08209682, -0.07416632, 0.10529841, -0.14161663, -0.088301264, 0.0029876695, 0.11381751, 0.083498895, 0.15414985));
	conv2d_5_tf += mul(g1, min16float4x4(0.20285544, -0.16456522, 0.06494461, -0.013555718, -0.07797077, -0.13418226, -0.0014035929, 0.056061633, -0.024789125, -0.053674392, 0.048963223, 0.121051155, 0.064334966, -0.0482476, 0.068401285, -0.07039275));
	conv2d_5_tf += mul(h1, min16float4x4(0.098433256, -0.3636959, 0.2678772, -0.046356395, -0.1771877, -0.017444499, -0.06527938, 0.073921666, -0.1880833, 0.1873346, 0.10331725, -0.05711381, 0.049431477, -0.047258172, 0.13095368, -0.35352108));
	conv2d_5_tf += mul(i1, min16float4x4(0.10444254, -0.16424808, -0.00615067, 0.1023235, -0.122729294, -0.2563471, 0.00030699265, 0.09230543, 0.07732433, -0.03397466, -0.03141724, 0.2431111, 0.009742008, -0.07286298, -0.015188814, 0.025636861));
	conv2d_5_tf += mul(a2, min16float4x4(-0.06326144, -0.045018848, -0.130233, -0.015639791, -0.015171213, -0.009451374, 0.06830251, 0.07718799, 0.009820809, -0.10778585, 0.011396909, -0.067577444, 0.16482629, 0.099055305, 0.0517957, 0.008594935));
	conv2d_5_tf += mul(b2, min16float4x4(-0.037354734, 0.09272911, -0.11168438, 0.1708543, -0.12653585, -0.042765, 0.008014873, 0.22469266, 0.019282004, 0.0041092015, -0.029787902, 0.025127187, -0.05086034, 0.0077483514, 0.010261478, 0.07023893));
	conv2d_5_tf += mul(c2, min16float4x4(0.055195954, 0.004654069, -0.02118881, -0.05352797, -0.021830624, -0.010750989, -0.032053873, 0.18029462, -0.0703946, 0.06940036, 0.011578795, 0.049051903, 0.12236165, 0.1469314, -0.04752202, -0.02873477));
	conv2d_5_tf += mul(d2, min16float4x4(0.11799468, -0.022473548, 0.0045530205, 0.0870364, 0.1895775, -0.041058388, 0.079169616, -0.08769193, -0.012526104, 0.03904729, 0.016011083, -0.010498281, 0.08499936, -0.050380737, 0.14939919, 0.009984251));
	conv2d_5_tf += mul(e2, min16float4x4(0.10477428, 0.16810521, -0.1314053, 0.084377944, 0.17922944, -0.304226, 0.25293878, -0.15422472, 0.20214307, 0.10322054, -0.13431601, 0.04898287, 0.09717359, -0.07664543, 0.14711176, 0.15777126));
	conv2d_5_tf += mul(f2, min16float4x4(-0.027849296, -0.107415505, -0.048003152, -0.14503942, 0.16935585, -0.11120448, 0.19879252, 0.25992575, 0.10300595, 0.044460453, 0.095423825, -0.0006854256, 0.04321415, -0.042708825, 0.02633511, -0.06220348));
	conv2d_5_tf += mul(g2, min16float4x4(-0.004024937, 0.05021026, -0.00765448, 0.18315202, -0.078596614, -0.022813313, 0.09930163, 0.08525698, -0.0024254897, -0.06150155, 0.12159309, 0.056743085, -0.19437842, 0.02563038, -0.14668292, -0.0805431));
	conv2d_5_tf += mul(h2, min16float4x4(0.037370156, -0.13586049, -0.11521326, -0.07453397, -0.025900846, -0.0823091, -0.14436729, 0.14114335, 0.055820756, 0.05531836, -0.1474026, 0.10203739, 0.053665128, 0.00896543, 0.13431323, -0.12663968));
	conv2d_5_tf += mul(i2, min16float4x4(-0.20144333, 0.05849729, 0.06303023, -0.17678042, 0.03238696, -0.19829398, 0.12956308, -0.20013878, -0.1353999, -0.001031907, 0.10556917, -0.14760506, 0.03315909, -0.10838441, 0.16175537, -0.001477876));
	conv2d_5_tf += mul(a3, min16float4x4(0.087629505, -0.05908092, 0.16011593, -0.11285914, -0.4358247, 0.18938082, -0.31105244, -0.3638732, -0.0069619874, 0.029419519, -0.2156866, -0.13693112, -0.113110565, -0.09906378, -0.11164287, -0.084068194));
	conv2d_5_tf += mul(b3, min16float4x4(0.072181284, 0.035425037, 0.028820323, 0.12786204, 0.37121117, -0.076626934, 0.058864776, -0.20865935, -0.0014984896, 0.05978116, 0.117927864, 0.013273026, 0.088378325, 0.13492325, 0.018144222, 0.22580223));
	conv2d_5_tf += mul(c3, min16float4x4(0.045780275, 0.13346507, 0.056960598, -0.0019664192, -0.24231891, -0.13189796, 0.11114239, -0.07587297, 0.03099761, 0.10284658, 0.094186746, 0.04669001, -0.20374449, -0.12047404, -0.10640337, -0.03541381));
	conv2d_5_tf += mul(d3, min16float4x4(0.14384045, 0.12343541, -0.029074568, 0.13204664, 0.18878254, 0.115503244, -0.20217639, 0.16410889, -0.79949176, 0.5460196, -0.09889672, 0.27109572, 0.10628155, 0.13510233, -0.20859608, -0.07706875));
	conv2d_5_tf += mul(e3, min16float4x4(-0.11215904, 0.08981538, -0.10094039, -0.054024383, 0.2652237, -0.2002571, -0.15960355, 0.032049023, 0.007806114, 0.10592316, -0.3487021, 0.048408728, 0.10263737, -0.026020324, 0.072276175, -0.1190967));
	conv2d_5_tf += mul(f3, min16float4x4(-0.03184955, -0.00798831, -0.028087616, -0.010780139, -0.05444991, 0.09402867, 0.30834422, 0.14518146, -0.010965188, 0.14643683, -0.02568113, 0.068982124, 0.044459574, -0.05092265, -0.0028792082, 0.17158687));
	conv2d_5_tf += mul(g3, min16float4x4(0.0869746, 0.15908171, -0.0033584125, 0.049515188, -0.15995023, 0.20953654, -0.16041277, -0.08435643, 0.42034048, 0.096904315, -0.1927207, -0.0792477, 0.078221194, -0.10053459, -0.17969237, 0.08374661));
	conv2d_5_tf += mul(h3, min16float4x4(0.10612468, -0.23303585, -0.08996894, 0.10191982, 0.10724305, 0.1258089, -0.08111434, 0.103680536, 0.00824538, 0.2173516, -0.601468, -0.17365147, -0.09311857, -0.045947216, 0.20118287, 0.00016345571));
	conv2d_5_tf += mul(i3, min16float4x4(-0.07453406, 0.02476293, -0.089717, -0.14455949, -0.1427004, -0.21921235, 0.1878364, -0.023677701, -0.29442346, 0.13739492, -0.10435927, -0.35067815, 0.00956389, 0.049088918, -0.055482347, 0.1527778));
	conv2d_5_tf += mul(na1, min16float4x4(-0.20727113, 0.23718962, 0.17435564, -0.017858913, -0.042935595, 0.1996666, -0.059547734, 0.09735509, 0.019539079, -0.012399102, 0.057370137, 0.027493393, -0.10042333, -0.07915818, 0.07218426, 0.1309558));
	conv2d_5_tf += mul(nb1, min16float4x4(-0.032295313, 0.07833535, 0.22808518, 0.012292011, 0.09856554, -0.01996994, -0.028461069, 0.029348027, -0.25023523, 0.21794361, 0.14906348, 0.039845698, -0.004544177, -0.031246802, 0.019103816, 0.07738693));
	conv2d_5_tf += mul(nc1, min16float4x4(-0.15647748, -0.048666175, -0.03838509, 0.22003315, 0.048363995, -0.077338494, 0.109276325, -0.000109877525, -0.10441263, 0.18494262, -0.08754767, 0.12850273, 0.03408794, 0.15086798, -0.19896401, 0.048397515));
	conv2d_5_tf += mul(nd1, min16float4x4(0.011858143, -0.121841036, 0.0048841173, -0.062427614, 0.14153655, 0.011297287, 0.12778129, 0.004588582, 0.021572713, 0.15850346, 0.06464319, 0.06260356, 0.0838926, 0.04272777, 0.0733926, -0.08732838));
	conv2d_5_tf += mul(ne1, min16float4x4(0.20364462, 0.15701732, 0.053049877, -0.46085536, -0.037331745, -0.05813282, 0.036300424, 0.05660442, 0.14007641, 0.12849629, 0.08266283, -0.07872285, 0.07497584, -0.102409676, -0.12487048, -0.06305082));
	conv2d_5_tf += mul(nf1, min16float4x4(0.26158065, -0.090300985, 0.3522249, 0.18087223, -0.06095069, -0.10725335, 0.285748, 0.15195337, -0.19382374, -0.11163994, -0.10937165, -0.05908017, 0.0042464877, -0.14594594, -0.16316739, -0.17099144));
	conv2d_5_tf += mul(ng1, min16float4x4(-0.10028552, -0.18077525, 0.29705408, 0.12354066, 0.0198171, -0.08987044, 0.26377577, 0.075702764, 0.06952089, 0.0049671913, -0.3116211, 0.017268507, 0.37579817, -0.037516277, -0.09738986, 0.0917646));
	conv2d_5_tf += mul(nh1, min16float4x4(0.17661515, -0.17850937, -0.0018308868, 0.18318558, -0.0013081668, -0.113424055, -0.22193146, 0.15262845, -0.13412614, -0.13704826, -0.22099695, 0.24989522, 0.0740908, -0.3789193, -0.05141985, 0.14818457));
	conv2d_5_tf += mul(ni1, min16float4x4(0.31471825, 0.16524819, 0.03326876, -0.14611365, -0.1191457, -0.06510173, -0.13893965, -0.33106923, 0.13048746, -0.527816, 0.01877066, 0.26005507, -0.06294366, -0.24761125, -0.102864824, 0.094261676));
	conv2d_5_tf += mul(na2, min16float4x4(0.023637002, -0.07186282, 0.0946568, 0.13016573, 0.27244806, -0.08329611, 0.049762517, 0.14729369, 0.15868294, 0.07715838, -0.039478883, -0.06753388, 0.13460182, -0.092146814, -0.11814287, 0.12007007));
	conv2d_5_tf += mul(nb2, min16float4x4(0.06190745, -0.023566067, 0.239366, -0.0068376404, -0.15343493, 0.043685004, -0.047154866, 0.06527902, 0.11998191, -0.2565534, -0.091910206, -0.24104144, -0.12814765, 0.18195467, 0.11766466, 0.06181653));
	conv2d_5_tf += mul(nc2, min16float4x4(-0.06866098, 0.11969287, 0.00997188, 0.09261804, -0.14177154, -0.0052282973, 0.008734555, -0.20822202, 0.0068409014, -0.00470473, 0.031823143, -0.0601048, 0.05632819, 0.01690721, 0.01305342, -0.05824624));
	conv2d_5_tf += mul(nd2, min16float4x4(0.20557542, -0.10924632, 0.012821291, -0.11472336, -0.012862975, -0.09720539, 0.016499901, 0.053605244, 0.2183789, -0.014083709, -0.052786104, -0.075659566, -0.15531872, -0.1454758, 0.032142643, 0.28776056));
	conv2d_5_tf += mul(ne2, min16float4x4(-0.09832725, 0.3388722, -0.092447765, -0.16408351, -0.2557467, 0.031259898, 0.12057204, -0.018744074, -0.46363798, 0.042668946, 0.06506717, -0.25751963, 0.043604825, 0.11740889, 0.07365291, -0.027296776));
	conv2d_5_tf += mul(nf2, min16float4x4(-0.060943104, -0.00371101, 0.13572243, 0.013030143, 0.01196217, -0.14187267, -0.016784329, -0.048273906, 0.2050283, -0.02000498, -0.069050424, -0.09851947, 0.028769497, 0.1289265, -0.0022706073, -0.00296877));
	conv2d_5_tf += mul(ng2, min16float4x4(-0.015049836, 0.01153945, -0.006021933, -0.022156725, -0.030286482, 0.24230544, 0.040056467, -0.021735856, 0.20740065, -0.08999259, 0.006861033, -0.104062624, 0.26829463, 0.051726963, -0.12235904, 0.19572715));
	conv2d_5_tf += mul(nh2, min16float4x4(0.12676726, 0.17367609, -0.03689342, -0.034580305, -0.006836569, -0.06386566, 0.30929026, 0.09361281, -0.06405332, 0.26401913, -0.33314535, -0.06335476, -0.10960964, 0.13062708, 0.058030583, -0.1269144));
	conv2d_5_tf += mul(ni2, min16float4x4(0.03625719, 0.07449099, 0.021113826, 0.008309737, -0.09200202, -0.13108951, -0.0054502958, 0.19819209, -0.24836262, 0.22340319, -0.06844758, -0.22940424, -0.03410828, 0.03854127, -0.050844472, 0.019776637));
	conv2d_5_tf += mul(na3, min16float4x4(0.014228765, -0.013087027, -0.18055649, 0.001141047, 0.14329694, -0.008534367, 0.006927009, -0.058499523, -0.030727612, -0.07256724, 0.0025644915, 0.007111054, 0.036673337, -0.026148604, 0.120233335, 0.110904366));
	conv2d_5_tf += mul(nb3, min16float4x4(-0.008129229, 0.047908727, -0.1769762, 0.013220415, 0.066762984, 0.06523022, -0.016525066, -0.014394631, -0.008272182, -0.029847749, -0.10351308, 0.036801845, 0.11523106, -0.055156656, 0.11873017, -0.128935));
	conv2d_5_tf += mul(nc3, min16float4x4(0.21848068, -0.002019241, -0.06304477, 0.026670042, 0.039536465, -0.14145948, -0.06304873, 0.023532849, -0.122648045, 0.036414735, -0.037745856, -7.688992e-06, 0.059370764, -0.015019475, -0.029084614, 0.015826277));
	conv2d_5_tf += mul(nd3, min16float4x4(-0.09427522, -0.001972529, -0.09509679, -0.104867265, 0.05705236, 0.00031401246, 0.096889675, 0.15868911, -0.033721585, 0.08299121, -0.095194876, -0.1062834, -0.029866459, -0.041780088, -0.023895228, -0.0026728562));
	conv2d_5_tf += mul(ne3, min16float4x4(-0.27093527, -0.026471421, 0.09702481, 0.036061123, -0.1268649, 0.099340335, 0.15685195, -0.070615016, -0.13991052, -0.04212775, 0.096722156, 0.056507673, 0.02626438, 0.030435594, -0.00033173471, -0.024930432));
	conv2d_5_tf += mul(nf3, min16float4x4(-0.21608484, 0.038410295, -0.10975598, 0.12944944, -0.034110125, 0.03908566, -0.030190451, 0.031670973, -0.018954927, 0.0726848, 0.023156218, 0.017966276, -0.09825987, 0.023912448, 0.07257811, -0.008502145));
	conv2d_5_tf += mul(ng3, min16float4x4(0.044695053, -0.046481512, -0.098602146, -0.13273694, -0.09406325, -0.0062411693, 0.10242225, 0.025881069, 0.061662897, 0.019632077, -0.069696225, -0.14693011, 0.034227923, 0.037439592, -0.17188378, -0.19963826));
	conv2d_5_tf += mul(nh3, min16float4x4(-0.25531536, -0.050288115, 0.11258405, -0.24783169, -0.034263797, 0.054084245, 0.119918555, -0.027509615, 0.10056127, -0.09610037, 0.16208062, 0.005269051, 0.08660796, 0.11050934, -0.012584769, -0.0040703616));
	conv2d_5_tf += mul(ni3, min16float4x4(0.07649277, 0.13011539, -0.052341804, 0.07836859, 0.18562089, 0.07701519, -0.15669914, 0.007145429, 0.018427812, -0.12513049, -0.03395353, 0.14632194, -0.108091615, -0.01585824, 0.0602756, -0.11572579));
	conv2d_5_tf += min16float4(0.028852103, -0.003142654, 0.019121574, 0.026819304);
	min16float4 nconv2d_5_tf = max(-conv2d_5_tf, 0);
	conv2d_5_tf = max(conv2d_5_tf, 0);
	
	min16float4 conv2d_4_tf = mul(a1, min16float4x4(-0.032557677, 0.15826401, -0.11499422, -0.08640765, -0.09198991, -0.007192731, 0.010252954, 0.023780089, 0.15372203, -0.0009684923, 0.051660325, 0.011104123, 0.023871671, 0.005045307, 0.10722681, 0.065446004));
	conv2d_4_tf += mul(b1, min16float4x4(0.04723326, 0.21495502, -0.4453857, -0.020825233, 0.021379868, 0.04798187, 0.11383445, 0.08597329, 0.05730255, -0.046370696, -0.105095126, -0.03220056, -0.10122536, 0.06955123, -0.023051325, -0.04296927));
	conv2d_4_tf += mul(c1, min16float4x4(-0.02551809, 0.16179861, -0.15771814, -0.00045056897, 0.05842655, 0.11279471, 0.08018674, 0.05973765, 0.044070918, 0.08054599, -0.070336945, 0.05499731, -0.039118823, 0.003635353, -0.019759493, -0.040480837));
	conv2d_4_tf += mul(d1, min16float4x4(-0.04707628, 0.040738698, -0.013698143, -0.047391538, 0.031729057, -0.01837267, -0.10985463, -0.0028168112, -0.03167109, 0.0007989082, 0.011234699, 0.06895626, -0.12226361, 0.016290974, -0.055669673, -0.17432979));
	conv2d_4_tf += mul(e1, min16float4x4(-0.05069543, 0.15450205, 0.06981913, -0.377529, -0.14111535, 0.124757245, 0.021858096, 0.044034548, -0.16741593, 0.09746289, -0.045757677, -0.11644043, -0.09906484, 0.19128124, 0.061969943, -0.14589702));
	conv2d_4_tf += mul(f1, min16float4x4(0.12177423, 0.077437244, 0.059054222, 0.14925033, 0.016682645, -0.004765056, -0.2194741, 0.11314126, 0.2384071, -0.12049565, 0.12753354, 0.19679058, 0.03558123, 0.018636368, -0.11018761, -0.027520377));
	conv2d_4_tf += mul(g1, min16float4x4(-0.03618456, -0.030103968, 0.02968891, -0.00393875, -0.07128213, 0.022181263, -0.08430743, -0.027601235, -0.09228556, 0.04661313, 0.054729965, 0.052708175, 0.050483003, -0.022951633, 0.099321984, -0.043519083));
	conv2d_4_tf += mul(h1, min16float4x4(0.034695346, 0.10380181, -0.043013666, 0.037639238, 0.118943654, 0.027931944, 0.07628075, -0.12427217, 0.14970858, -0.065848, 0.0030750742, 0.011039123, 0.27721024, -0.055808693, 0.25105593, -0.1825985));
	conv2d_4_tf += mul(i1, min16float4x4(0.03627934, -0.17293514, 0.09188732, 0.11569783, -0.035355445, -0.10536353, -0.0068529076, -0.0929389, 0.09053234, 0.05907859, 0.049182277, 0.15194432, -0.09835422, 0.00061943196, 0.066343345, -0.06307589));
	conv2d_4_tf += mul(a2, min16float4x4(0.10120336, -0.10855617, 0.13412404, -0.018874792, 0.037988223, 0.0957435, 0.015402347, -0.08589699, -0.07694196, -0.03258571, 0.064437136, -0.0495422, 0.24836332, -0.0041739377, 0.093993485, -0.0076778256));
	conv2d_4_tf += mul(b2, min16float4x4(-0.20205948, 0.035698004, 0.0120531265, 0.03971649, 0.07550046, 0.047750015, -0.049045984, 0.04001014, -0.030263485, -0.0030697742, 0.05283423, -0.00014085052, -0.062447365, -0.0503476, -0.085151225, -0.04436882));
	conv2d_4_tf += mul(c2, min16float4x4(0.1516312, -0.073820546, -0.01047401, 0.0002717457, -0.17057727, 0.20856272, -0.09357496, -0.17346743, -0.068092465, -0.023344085, -0.03279074, -0.077289, -0.09844614, -0.035491887, 0.048796505, -0.03633584));
	conv2d_4_tf += mul(d2, min16float4x4(0.0073127835, 0.041834716, 0.015633723, -0.042742077, 0.08359733, -0.13898548, 0.1343008, 0.04692816, 0.051663343, -0.1277769, 0.029269615, 0.021745533, 0.09920264, 0.032076713, -0.05319438, 0.040574815));
	conv2d_4_tf += mul(e2, min16float4x4(0.052737534, -0.02136074, -0.18437223, 0.030766862, 0.23291707, -0.010449272, 0.032748792, 0.1304141, 0.27302903, 0.008562884, 0.13475919, 0.044446316, -0.17819557, 0.08270108, 0.06075267, -0.112788476));
	conv2d_4_tf += mul(f2, min16float4x4(-0.093748294, -0.004655885, -0.044859763, -0.11719146, -0.4701752, 0.09076277, -0.2283514, -0.34524822, -0.11999304, -0.010338027, 0.026785752, 0.029790966, -0.0635327, -0.024085084, -0.12074973, 0.080456585));
	conv2d_4_tf += mul(g2, min16float4x4(-0.023425102, -0.105786875, 0.1220016, 0.017974272, -0.12736784, -0.050550908, -0.1985566, 0.09139255, -0.18943925, -0.0067088404, -0.15007311, -0.015332959, 0.16430685, 0.006736225, -0.009263825, -0.08230126));
	conv2d_4_tf += mul(h2, min16float4x4(-0.15165123, 0.057155497, -0.09756418, 0.0475568, -0.14430566, 0.05169595, -0.24240975, 0.061147846, 0.0017831615, 0.028189357, -0.12519005, 0.03604646, -0.0460214, 0.05936097, -0.0213775, -0.28192145));
	conv2d_4_tf += mul(i2, min16float4x4(-0.019390648, 0.005514995, -0.0024649797, 0.056670878, -0.10385216, -0.05531206, 0.23233996, -0.16394126, 0.1718211, -0.08723329, 0.08580946, -0.028214762, -0.060853615, 0.0458013, 0.106201656, 0.031685878));
	conv2d_4_tf += mul(a3, min16float4x4(-0.105268896, 0.0106684705, -0.10355101, -0.07401398, 0.12425712, -0.21308881, 0.05200582, -0.024954682, -0.1120292, 0.07799603, -0.031506516, 0.0031533986, -0.05264893, -0.11141642, 0.107277475, 0.049987797));
	conv2d_4_tf += mul(b3, min16float4x4(0.08439962, -0.14181082, -0.20358182, 0.09080642, -0.061622817, 0.24017061, -0.12030436, 0.17224449, -0.0220505, 0.20025904, 0.1032571, 0.032335218, -0.09232964, -0.06172056, -0.1011141, -0.07322099));
	conv2d_4_tf += mul(c3, min16float4x4(-0.10896482, 0.06107763, -0.100641444, -0.018832406, 0.020139545, -0.0037260412, -0.10512619, -0.24599148, 0.014342631, 0.056689363, -0.06662091, 0.03999069, 0.00824376, 0.030449467, 0.027041748, -0.056902107));
	conv2d_4_tf += mul(d3, min16float4x4(-0.18174766, 0.040627997, 0.1140224, -0.20088135, 0.07404639, 0.01215843, -0.050341435, -0.0011868333, -0.5206288, 0.53214884, -0.60289955, 0.25364086, -0.05814184, 0.21600877, 0.07475344, 0.0624221));
	conv2d_4_tf += mul(e3, min16float4x4(-0.07710521, 0.030054979, -0.28164682, -0.13994755, 0.028757188, 0.04356096, -0.14357159, 0.2761477, -0.5300268, 0.44994202, -0.15364286, -0.18580483, 0.084563375, -0.13093601, 0.08291044, 0.017790407));
	conv2d_4_tf += mul(f3, min16float4x4(0.013963807, 0.0032885068, 0.0069646467, 0.03777879, -0.30103573, -0.047965538, 0.057550967, -0.3402889, 0.0026557294, 0.2289777, 0.01937088, 0.18484715, 0.083694465, -0.056240357, -0.0023172104, -0.13328342));
	conv2d_4_tf += mul(g3, min16float4x4(-0.05847699, 0.06990862, -0.0076244893, 0.03992696, 0.088809974, -0.059422277, -0.10557949, 0.058280375, -0.37764055, -0.19777957, -0.86350954, -0.21546844, 0.21863134, -0.074350335, 0.039010234, -0.021216504));
	conv2d_4_tf += mul(h3, min16float4x4(-0.18698102, -0.024641648, -0.16558538, -0.06499548, 0.10435924, 0.0030438402, -0.021636335, 0.046050593, -0.22217542, -0.14033853, -0.21516539, -0.4834089, 0.061894827, -0.024107188, 0.045805957, 0.20019397));
	conv2d_4_tf += mul(i3, min16float4x4(-0.0657418, 0.074276686, -0.07074239, -0.0101531055, -0.17146541, -0.016556345, -0.16196094, -0.13551502, -0.017605018, 0.065230414, 0.10717515, 0.41153327, 0.07095331, -0.05611257, -0.09297768, -0.054604497));
	conv2d_4_tf += mul(na1, min16float4x4(-0.051999312, 0.28559515, -0.09147715, 0.04536181, 0.077552326, 0.052161235, 0.006652824, 0.12593806, -0.07654755, 0.056134425, 0.029163264, -0.05461885, 0.04772557, 0.14073811, 0.07795857, -0.0397234));
	conv2d_4_tf += mul(nb1, min16float4x4(-0.0698435, 0.17774913, -0.07301677, -0.14336437, -0.104051985, 0.14831689, 0.045199208, -0.1867252, 0.07530157, 0.12153924, 0.1397731, -0.026905237, 0.056165505, 0.21213025, 0.073159344, 0.03143804));
	conv2d_4_tf += mul(nc1, min16float4x4(0.029820994, -0.079599164, 0.12901585, 0.014192698, -0.0816397, 0.02425821, 0.10938256, 0.0077257096, -0.009784561, 0.20602871, -0.07226973, -0.16234052, 0.0064664064, -0.023469927, 0.0037447219, 0.015258041));
	conv2d_4_tf += mul(nd1, min16float4x4(-0.028296372, 0.23841251, 0.04076168, 0.061052933, -0.082375534, 0.11200519, 0.025308013, 0.1736187, 0.23024227, -0.004161287, 0.16408522, -0.0141539015, 0.01496407, -0.037708607, 0.15057993, 0.14573294));
	conv2d_4_tf += mul(ne1, min16float4x4(0.22485349, -0.2217838, -0.011602474, 0.22668324, 0.2172098, -0.21826234, -0.09506227, -0.06592076, 0.14401191, 0.014868243, 0.41509256, 0.2799861, 0.04998898, -0.121938676, -0.29612163, 0.16926381));
	conv2d_4_tf += mul(nf1, min16float4x4(0.009154201, -0.14300221, 0.0121250935, -0.049595118, -0.3256411, -0.07036471, -0.066481166, -0.32643607, 0.13287841, -0.096211806, -0.24969384, -0.36735064, -0.14625767, 0.07217462, 0.06205977, 0.13962744));
	conv2d_4_tf += mul(ng1, min16float4x4(0.10122661, -0.042678952, 0.08920629, -0.022906423, -0.048781462, 0.008094098, 0.16410494, 0.01511925, 0.009355741, -0.034123767, 0.06522056, -0.04114966, 0.025140515, -0.046565775, 0.18292467, 0.009392873));
	conv2d_4_tf += mul(nh1, min16float4x4(-0.06604219, -0.10034091, 0.10934946, 0.18707348, -0.19358878, 0.11417287, -0.024397675, 0.04772407, -0.10278711, -0.03847901, -0.025120566, 0.047323767, -0.26464674, 0.15394583, -0.042590924, -0.09511779));
	conv2d_4_tf += mul(ni1, min16float4x4(-0.13339657, 0.13506593, 0.011463314, 0.077461444, -0.022262955, 0.06132727, -0.113292165, -0.1987806, 0.0027555283, -0.016475892, 0.14219329, -0.211625, 0.11405046, -0.12044097, -0.088240534, 0.17436995));
	conv2d_4_tf += mul(na2, min16float4x4(-0.08783496, 0.06564822, -0.10796846, -0.13460107, 0.10140343, 0.08105866, 0.0040176474, -0.045305755, -0.09299188, -0.18928377, -0.099694185, 0.11314726, -0.018881949, 0.04591721, 0.117965475, -0.00035760578));
	conv2d_4_tf += mul(nb2, min16float4x4(0.043456256, 0.10901491, 0.010485461, -0.061420415, -0.04018357, 0.1689085, 0.015425885, 0.061508525, 0.069377325, -0.18156749, 0.19194232, -0.25884745, -0.036184482, -0.0069973134, 0.021037813, -0.08046543));
	conv2d_4_tf += mul(nc2, min16float4x4(-0.044377886, 0.18098527, -0.07314578, -0.00287104, 0.038114406, -0.044841792, -0.063126855, 0.19896339, -0.09739791, -0.24212237, 0.19623765, -0.06326722, 0.062247403, 0.054567214, 0.10500492, 0.04231698));
	conv2d_4_tf += mul(nd2, min16float4x4(0.12399143, -0.09728722, 0.06730315, -0.011540306, -0.116925925, 0.0074092527, 0.21276267, 0.068349704, -0.05713399, 0.17656437, -0.10295556, -0.12709019, 0.102335855, 0.2679535, -0.06597912, -0.022839248));
	conv2d_4_tf += mul(ne2, min16float4x4(0.1265364, 0.16177331, -0.075765, -0.06347739, -0.056721687, 0.18794554, 0.006572088, -0.00011200755, 0.05219661, 0.21530084, -0.101604566, 0.04750483, -0.09394214, -0.11256657, 0.11389309, -0.011598962));
	conv2d_4_tf += mul(nf2, min16float4x4(0.015922887, -0.046698473, 0.0130271325, -0.052948795, 0.16426764, 0.09934194, -0.07745314, 0.038738497, -0.040967297, 0.06423774, 0.034312535, -0.013723525, -0.0030767843, 0.041221425, 0.041528914, 0.027097305));
	conv2d_4_tf += mul(ng2, min16float4x4(-0.13077654, 0.046842843, 0.034140635, 0.10109363, 0.20840693, -0.012975956, -0.041564208, 0.009877259, -0.033334266, -0.106034294, 0.2507187, -0.01512933, -0.008589095, 0.1849223, -0.06436464, 0.087347835));
	conv2d_4_tf += mul(nh2, min16float4x4(0.13326278, -0.035467118, 0.12698379, -0.034838732, 0.023856519, 0.05274121, -0.09120117, 0.070493534, -0.14804247, 0.08772896, -0.1343374, -0.058013596, -0.1194792, -0.07288297, 0.074856065, 0.021033823));
	conv2d_4_tf += mul(ni2, min16float4x4(0.023594514, -0.018284807, -0.037060708, -0.06051526, 0.13681069, 0.09436225, -0.044987947, 0.21031074, -0.14567234, 0.04987286, -0.24576813, -0.091558464, 0.0040201824, -0.045261826, 0.050834723, 0.04080285));
	conv2d_4_tf += mul(na3, min16float4x4(-0.12843935, 0.11059404, 0.035774253, 0.016019672, 0.13419932, -0.082884714, 0.086934, -0.027470622, -0.0055711996, 0.14726739, 0.00025540774, -0.082832016, 0.015134819, -0.1869738, -0.15580305, 0.118347436));
	conv2d_4_tf += mul(nb3, min16float4x4(-0.03210018, -0.07439424, 0.09171389, 0.0061248797, -0.122092225, -0.0055175424, 0.060848907, 0.05447007, -0.1005626, -0.13843839, -0.11508479, 0.034595586, 0.16528612, 0.07630222, 0.10175574, -0.034656286));
	conv2d_4_tf += mul(nc3, min16float4x4(0.05687666, -0.1130296, -0.038044114, 0.1376985, 0.02434624, -0.21984427, -0.0038558878, -0.10872551, 0.00807944, 0.019718373, 0.07016335, 0.001672884, -0.051990695, -0.04958167, -0.036594924, -0.0008506928));
	conv2d_4_tf += mul(nd3, min16float4x4(-0.07842389, -0.0907049, 0.10945533, -0.14496571, 0.03524454, -0.12881151, -0.13281278, -0.023060825, -0.037150636, -0.0001619192, 0.07462792, 0.19251943, -0.048907887, -0.09152158, 0.077018015, -0.0076050037));
	conv2d_4_tf += mul(ne3, min16float4x4(-0.06379491, 0.22390717, -0.044009656, -0.19816853, -0.14713046, 0.114638254, -0.008227305, -0.014490413, 0.04359834, 0.10032826, -0.17928778, -0.13981889, -0.07729277, 0.11685862, 0.21970165, -0.09117455));
	conv2d_4_tf += mul(nf3, min16float4x4(0.21068226, 0.030921075, 0.109845765, 0.058498275, 0.015876649, -0.0067828237, -0.10064077, 0.13756661, 0.017506564, 0.041748323, 0.17195722, 0.012285508, -0.023290245, 0.07060226, 0.069730066, -0.018874977));
	conv2d_4_tf += mul(ng3, min16float4x4(0.19153018, -0.07691863, -0.03687873, -0.069982305, -0.097453654, 0.060358603, -0.030159682, -0.048520114, 0.12498585, -0.07376571, -0.01039302, -0.099845245, 0.00042995642, 0.035783857, -0.12854497, -0.024975097));
	conv2d_4_tf += mul(nh3, min16float4x4(0.11177764, -0.02895167, 0.09053559, -0.24130683, -0.09276382, 0.04739869, -0.005453787, 0.031923447, 0.089385964, -0.048109047, 0.061177306, 0.117845595, 0.014615613, 0.1153759, -0.0007218852, -0.10042441));
	conv2d_4_tf += mul(ni3, min16float4x4(0.041179586, 0.00042151578, 0.07818137, 0.06354339, 0.0049364083, -0.055836283, -0.0073542926, 0.047470722, -0.15328479, 0.03497268, -0.17375292, 0.0006636334, -0.043640774, -0.007737031, 0.10040319, -0.09145891));
	conv2d_4_tf += min16float4(-0.0542914, -0.045369092, 0.029350873, -0.018128533);
	tex8[gxy] = conv2d_4_tf;
	min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0);
	conv2d_4_tf = max(conv2d_4_tf, 0);

	min16float4 target = mul(e1, min16float4x4(0.15610647, -0.15150696, -0.076018915, 0.030773202, -0.13935511, 0.17644633, 0.028819937, 0.30125114, 0.38625193, 0.35517895, 0.0975343, 0.114022225, 0.25494647, -0.23291643, 0.29096943, 0.15063812));
	target += mul(e2, min16float4x4(-0.22949804, -0.1368772, -0.07729264, 0.08470473, -0.06426131, -0.0064847367, 0.08241476, -0.1476949, -0.13712044, -0.36110023, -0.081719294, 0.19409889, 0.05562042, 0.26609465, 0.020447321, 0.2567414));
	target += mul(e3, min16float4x4(0.03337578, 0.2905731, 0.21772428, -0.074480034, 0.071880735, 0.27764675, -0.17273173, -0.0037474795, -0.1842544, 0.21896398, -0.30134472, 0.1711769, 0.23913746, -0.0435854, -0.12745531, -0.050227556));
	target += mul(ne1, min16float4x4(0.34923258, -0.5455803, -0.2904644, -0.5446842, -0.040965725, -0.055288248, -0.50672686, -0.10309429, 0.045286313, -0.04284262, -0.19785875, -0.16594213, -0.10000842, 0.47245356, -0.32767087, 0.32854807));
	target += mul(ne2, min16float4x4(0.05952625, -0.062991776, 0.3438396, -0.08141334, -0.2488028, -0.04746144, 0.06563561, 0.45020792, -0.19996788, 0.015523991, -0.19214569, -0.24849077, -0.022107737, 0.28190804, 0.13384444, -0.12800638));
	target += mul(ne3, min16float4x4(-0.37812218, 0.09970516, 0.015231938, 0.07226164, -0.33720142, -0.05899804, -0.0025790115, -0.17770731, 0.111127384, 0.008749534, -0.09077738, -0.060420215, -0.10196339, 0.09641038, 0.25222716, 0.12781976));
	target += mul(conv2d_5_tf, min16float4x4(0.24168618, 0.18625724, -0.012904225, -0.011732107, 0.085045695, -0.4754185, 0.10896487, 0.09179793, -0.31662637, -0.117563, 0.5133052, -0.09457646, -0.15872721, -0.09779008, 0.56810176, 0.3339073));
	target += mul(nconv2d_5_tf, min16float4x4(-0.09105348, -0.17617023, -0.21897802, -0.14157395, 0.16165406, -0.46579927, 0.24905841, 0.11579037, 0.09073764, 0.36771873, -0.29340085, -0.04271419, -0.11684365, -0.17138094, 0.12188604, -0.14749436));
	target += mul(conv2d_1_tf, min16float4x4(0.10943254, -0.17193961, -0.07027378, -0.26047203, 0.04288517, 0.21311204, 0.03997142, -0.17006959, 0.16181368, 0.28361118, 0.26655135, -0.097007245, -0.15998597, -0.09568138, -0.27558687, -0.11706871));
	target += mul(nconv2d_1_tf, min16float4x4(0.365517, 0.5422966, -0.0013869518, 0.3447622, -0.25885904, -0.098901175, -0.048043057, 0.15867509, -0.12303401, -0.15362008, 0.270228, -0.2756776, -0.44207478, -0.0419657, 0.09387863, -0.07240854));
	target += mul(conv2d_4_tf, min16float4x4(0.15073416, -0.032387026, -0.039117433, -0.50999755, 0.073477276, -0.14495571, 0.15120687, -0.3443857, -0.29039595, -0.16189122, 0.14190345, -0.10934344, -0.21965231, -0.45768484, 0.11907852, 0.5091087));
	target += mul(nconv2d_4_tf, min16float4x4(0.23260471, 0.16441877, 0.16760987, 0.10740154, -0.21663232, -0.10124566, -0.20843595, 0.066555224, 0.24608357, 0.16345865, -0.11965141, 0.18451719, 0.41683537, -0.044497896, 0.39102596, -0.11944608));
	target += min16float4(-0.02423156, 0.015124756, -0.02608139, 0.030428935);
	tex1[gxy] = target;
	
	target = mul(e1, min16float4x4(-0.12407633, -0.027812717, 0.23094666, 0.060302667, -0.16624144, -0.0007371851, -0.28186718, 0.22369424, 0.022404855, 0.09096415, 0.0017822908, 0.336001, -0.09130467, 0.034111694, 0.19113103, -0.14513424));
	target += mul(e2, min16float4x4(-0.014768806, -0.31290373, 0.015769936, -0.13507901, -0.010203078, 0.4945444, -0.01088852, -0.1582938, -0.14903755, -0.1840089, -0.009966903, -0.19425109, -0.21303283, 0.26285252, -0.046254523, -0.15465552));
	target += mul(e3, min16float4x4(0.07533467, 0.26080438, 0.024856985, 0.34277654, -0.3129344, 0.30575162, 0.06931557, -0.044698272, 0.18042412, 0.45999247, -0.5192437, 0.022618707, -0.020097036, -0.27706465, -0.0050434433, -0.12770803));
	target += mul(ne1, min16float4x4(0.098648146, -0.21701503, 0.10266521, -0.085537605, 0.02402345, -0.28643832, 0.19378376, -0.12658586, 0.115897186, 0.01580828, 0.11827048, 0.29019687, -0.19341177, 0.09564265, 0.03476779, 0.11699004));
	target += mul(ne2, min16float4x4(0.058346223, 0.25530934, -0.026972264, 0.3190419, 0.12263199, 0.124316074, 0.04734691, 0.011293402, -0.17419139, -0.15893947, 0.093723476, 0.23282392, 0.19400646, -0.0533148, 0.026266033, 0.19663234));
	target += mul(ne3, min16float4x4(-0.06663804, 0.20435949, 0.044924624, -0.24982749, 0.20327586, 0.12442739, -0.3155765, -0.18541007, 0.18991531, -0.19276267, 0.21697456, 0.03178544, -0.3381796, -0.15325621, -0.25820518, -0.07297032));
	target += mul(conv2d_5_tf, min16float4x4(0.098007046, -0.17018083, 0.3390076, -0.2280134, 0.12989196, -0.044336785, -0.10702673, -0.37464848, 0.028437488, 0.24224928, -0.107826136, 0.0031239046, -0.34256136, -0.17936559, 0.091159485, -0.054418396));
	target += mul(nconv2d_5_tf, min16float4x4(0.053965975, -0.17428857, -0.43524495, -0.15119378, -0.25487635, 0.16371927, 0.1467712, -0.08216164, -0.5624722, -0.11886804, -0.058240388, 0.17669299, -0.15173754, 0.13094892, 0.39045286, -0.017048221));
	target += mul(conv2d_1_tf, min16float4x4(-0.15798661, -0.36355045, 0.1957264, -0.05392931, 0.098283805, 0.14677107, 0.16887192, -0.11125151, -0.113571666, 0.15960959, -0.09331763, -0.032195523, 0.17286941, 0.33965907, 0.09051416, -0.25542957));
	target += mul(nconv2d_1_tf, min16float4x4(0.16866244, 0.05636189, -0.100324616, 0.20495924, -0.102705345, -0.08387417, -0.09328024, 0.21541446, 0.1430065, 0.0308464, -0.0793588, -0.029477509, -0.28854427, -0.29555637, 0.33754608, -0.18144317));
	target += mul(conv2d_4_tf, min16float4x4(-0.11338383, 0.019528843, -0.24414338, -0.36290777, 0.54908705, -0.083018646, 0.007534378, -0.1406417, 0.37853354, 0.09911941, -0.047861155, -0.3186758, 0.2125856, -0.114667036, -0.07411896, 0.050717812));
	target += mul(nconv2d_4_tf, min16float4x4(0.2961511, 0.28937215, -0.36593223, -0.16141813, -0.087650776, -0.47516292, 0.0052091824, 0.033959586, -0.06072628, -0.0012637508, -0.037578013, -0.35235298, 0.11726439, 0.6064031, 0.34058803, 0.45300734));
	target += min16float4(-0.0038817346, -0.052502215, 0.008882693, -0.017785465);
	tex2[gxy] = target;
	
	target = mul(e1, min16float4x4(-0.21563801, -0.12204513, 0.31932783, 0.28290224, -0.17011476, -0.06448831, 0.004365267, -0.07169507, 0.21165244, -0.07712424, 0.14979824, 0.2240992, 0.48357385, -0.015724417, -0.3836641, 0.07599027));
	target += mul(e2, min16float4x4(-0.20743755, -0.119118474, 0.1009234, -0.2842955, -0.24531132, 0.062108602, 0.11733637, 0.06687575, -0.065953426, 0.15715389, 0.21475503, -0.1019138, 0.08085453, -0.24522887, -0.108375534, 0.29179853));
	target += mul(e3, min16float4x4(0.16713834, 0.030504826, -0.2423963, -0.41885766, -0.20249867, -0.061683156, -0.14999944, 0.54505223, 0.16486095, -0.023248592, -0.17566164, 0.089543514, -0.1884646, 0.15263423, 0.14438081, -0.21730141));
	target += mul(ne1, min16float4x4(0.37399703, 0.2731133, 0.11279373, 0.004775496, -0.19443156, -0.071899086, 0.17512012, -0.11265631, 0.01926881, -0.31321192, -0.32160205, -0.23714963, 0.097321026, 0.13937393, -0.28038052, -0.046872586));
	target += mul(ne2, min16float4x4(0.124041334, 0.083966166, 0.13945055, 0.087915726, 0.11154068, -0.09223973, -0.012948238, 0.16114026, 0.13717382, 0.11968761, 0.076536775, -0.15866219, -0.19017774, -0.11172013, 0.024816172, 0.096302085));
	target += mul(ne3, min16float4x4(0.081017025, -0.1537902, 0.193927, 0.22226687, 0.441012, 0.18478638, 0.30040395, 0.032401927, -0.13839063, 0.017778423, -0.42750338, -0.19760555, -0.21953818, -0.2148397, -0.084683254, 0.20916465));
	target += mul(conv2d_5_tf, min16float4x4(-0.3921892, 0.2123992, 0.14027761, 0.10175143, -0.11134986, -0.16432697, -0.1097465, -0.21807413, -0.09732297, -0.11108596, -0.39636138, -0.06654249, 0.18766358, -0.0061503067, 0.1286225, 0.2418667));
	target += mul(nconv2d_5_tf, min16float4x4(-0.0039234986, 0.17088562, 0.12906016, -0.13476452, -0.09124947, 0.3098052, 0.09895542, 0.18631962, -0.06776231, 0.19485205, 0.14722902, 0.32147923, -0.1811334, 0.15313488, 0.0796922, 0.0012897709));
	target += mul(conv2d_1_tf, min16float4x4(0.032229863, 0.025498863, 0.06695979, 0.019412167, -0.16543043, -0.12314033, 0.112201385, 0.16554663, 0.13644108, 0.3098045, 0.081390016, -0.006008416, -0.016406069, 0.22883923, 0.22282913, -0.13947442));
	target += mul(nconv2d_1_tf, min16float4x4(0.010251363, 0.08210024, -0.33465254, -0.012109372, 0.027115503, 0.1481351, -0.081793204, -0.20716506, 0.0056828605, -0.30995828, 0.11498873, 0.15678942, -0.061227474, -0.14681229, 0.1498136, 0.11219651));
	target += mul(conv2d_4_tf, min16float4x4(0.21796124, -0.12195326, 0.44734144, -0.124715045, -0.05986958, -0.25252253, -0.13802508, 0.16756216, 0.28327593, 0.38355786, -0.27178785, -0.19969118, -0.26010805, -0.074593216, 0.10679648, 0.15610766));
	target += mul(nconv2d_4_tf, min16float4x4(-0.07648412, -0.18866923, -0.2592641, 0.32486007, -0.6200149, 0.09312683, 0.42827863, -0.2703639, 0.08144911, -0.054994784, -0.24911343, 0.41974616, 0.036914464, -0.32325324, 0.012920313, -0.48379797));
	target += min16float4(-0.013587518, 0.049618572, -0.065549955, -0.007242324);
	tex3[gxy] = target;
}


//!PASS 4
//!DESC Conv-4x3x3x24, Conv-4x1x1x56
//!IN tex1, tex2, tex3, tex4, tex8
//!OUT tex9, tex5, tex6, tex7
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass4(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	min16float4 a1 = tex1.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e1 = tex1.SampleLevel(sam, pos, 0);
	min16float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i1 = tex1.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na1 = max(-a1, 0);
	min16float4 nb1 = max(-b1, 0);
	min16float4 nc1 = max(-c1, 0);
	min16float4 nd1 = max(-d1, 0);
	min16float4 ne1 = max(-e1, 0);
	min16float4 nf1 = max(-f1, 0);
	min16float4 ng1 = max(-g1, 0);
	min16float4 nh1 = max(-h1, 0);
	min16float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);
	
	min16float4 a2 = tex2.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e2 = tex2.SampleLevel(sam, pos, 0);
	min16float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i2 = tex2.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na2 = max(-a2, 0);
	min16float4 nb2 = max(-b2, 0);
	min16float4 nc2 = max(-c2, 0);
	min16float4 nd2 = max(-d2, 0);
	min16float4 ne2 = max(-e2, 0);
	min16float4 nf2 = max(-f2, 0);
	min16float4 ng2 = max(-g2, 0);
	min16float4 nh2 = max(-h2, 0);
	min16float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);
	
	min16float4 a3 = tex3.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e3 = tex3.SampleLevel(sam, pos, 0);
	min16float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i3 = tex3.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na3 = max(-a3, 0);
	min16float4 nb3 = max(-b3, 0);
	min16float4 nc3 = max(-c3, 0);
	min16float4 nd3 = max(-d3, 0);
	min16float4 ne3 = max(-e3, 0);
	min16float4 nf3 = max(-f3, 0);
	min16float4 ng3 = max(-g3, 0);
	min16float4 nh3 = max(-h3, 0);
	min16float4 ni3 = max(-i3, 0);

	a3 = max(a3, 0);
	b3 = max(b3, 0);
	c3 = max(c3, 0);
	d3 = max(d3, 0);
	e3 = max(e3, 0);
	f3 = max(f3, 0);
	g3 = max(g3, 0);
	h3 = max(h3, 0);
	i3 = max(i3, 0);
	
	min16float4 conv2d_1_tf = tex4.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0);
	conv2d_1_tf = max(conv2d_1_tf, 0);
	
	min16float4 conv2d_4_tf = tex8.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0);
	conv2d_4_tf = max(conv2d_4_tf, 0);

	min16float4 conv2d_8_tf = mul(a1, min16float4x4(-0.11553467, -0.14921814, -0.085377395, 0.10231987, 0.08155549, 0.07075523, 0.012124212, 0.013545821, 0.103393115, -0.007523045, 0.060739517, -0.05890024, 0.21902815, 0.020522034, -0.1402768, 0.1280077));
	conv2d_8_tf += mul(b1, min16float4x4(0.051253397, -0.040904667, 0.1898603, -0.11879134, 0.069219105, -0.04280286, -0.022459755, 0.14305754, -0.063906856, 0.21501009, 0.0023572869, 0.09840124, -0.072510734, 0.057598237, 0.06159614, -0.13078417));
	conv2d_8_tf += mul(c1, min16float4x4(0.09612547, -0.11506342, -0.0017697238, 0.082210004, 0.14244868, -0.018724512, 0.12827, -0.011984352, 0.08484893, 0.0534688, 0.06480922, 0.14134778, 0.0876346, -0.010892883, 0.15595037, -0.040623467));
	conv2d_8_tf += mul(d1, min16float4x4(-0.15201004, 0.0093916925, 0.25506935, 0.003084567, -0.06869725, -0.27494308, 0.12937209, -0.12043822, -0.1918611, -0.09398222, 0.045312967, -0.1111442, -0.1376949, 0.0053297062, -0.28389412, -0.1396928));
	conv2d_8_tf += mul(e1, min16float4x4(-0.12742004, -0.23415208, 0.12804613, -0.1406368, 0.09349193, -0.12212758, -0.05245734, -0.39274624, 0.23036338, 0.04170077, -0.12391477, -0.00871988, 0.012228075, 0.31633002, -0.17377669, -0.124939));
	conv2d_8_tf += mul(f1, min16float4x4(-0.01582657, 0.018224325, -0.01147676, -0.09984998, -0.026615107, -0.21468964, 0.21078119, 0.19190042, -0.096901044, -0.041285027, -0.014912263, 0.17798825, 0.06570931, 0.09232608, -0.1068993, 0.089612365));
	conv2d_8_tf += mul(g1, min16float4x4(0.051225413, -0.07643113, 0.058832865, 0.083958775, 0.11160564, -0.14167392, -0.021870648, 0.10238029, 0.047018003, 0.11449065, 0.08001371, -0.06804109, 0.033969186, 0.10051381, -0.0008517809, -0.07459736));
	conv2d_8_tf += mul(h1, min16float4x4(0.09263853, -0.09833199, 0.042132426, -0.13103375, -0.03731804, -0.039324153, 0.10190401, 0.024146391, 0.110644914, -0.12685625, -0.12852249, 0.021824492, 0.0784485, 0.11471671, -0.09116125, 0.010305502));
	conv2d_8_tf += mul(i1, min16float4x4(0.005317984, -0.055282168, 0.09082919, -0.10774655, 0.21394931, 0.0045357225, -0.06699662, 0.2507622, 0.15671767, 0.11952803, -0.06123182, 0.13399701, 0.046645127, 0.0026899239, 0.022635492, 0.07161002));
	conv2d_8_tf += mul(a2, min16float4x4(-0.017425103, 0.12552156, -0.093341894, -0.071356304, 0.15947455, 0.24979044, -0.03843421, 0.14001197, 0.15455416, -0.05550835, -0.011375887, -0.07661705, -0.12418336, 0.056913756, 0.16633298, 0.11513766));
	conv2d_8_tf += mul(b2, min16float4x4(-0.08778774, 0.057353538, -0.092138395, 0.002837398, 0.22633068, 0.120333284, 0.09834124, 0.05738123, 0.059130516, 0.22035405, -0.024255643, 0.02477418, 0.04645929, 0.39426094, 0.276884, -0.01479481));
	conv2d_8_tf += mul(c2, min16float4x4(0.18796739, 0.083501674, 0.07283311, 0.06415875, -0.024382524, 0.04679669, -0.15093789, -0.22831221, 0.058881074, 0.16446854, -0.028955745, 0.1956661, 0.0516941, 0.16135721, 0.11951658, 0.10451706));
	conv2d_8_tf += mul(d2, min16float4x4(0.008279574, 0.23456147, -0.12539841, -0.17107405, 0.12736088, -0.028486755, -0.18606788, -0.15545112, -0.025036227, 0.028735701, 0.17332946, -0.1413287, 0.050435208, -0.07583189, 0.14276801, 0.08007638));
	conv2d_8_tf += mul(e2, min16float4x4(0.23048489, -0.045157567, -0.014840823, 0.041994587, -0.0002087858, -0.01711496, -0.08994919, -0.05393212, -0.048546836, 0.18694918, -0.014523763, -0.14133967, 0.02896907, 0.08478857, 0.020594146, -0.0013243662));
	conv2d_8_tf += mul(f2, min16float4x4(-0.1141037, -0.11394802, 0.11164606, 0.12330282, -0.044497687, -0.06207866, 0.08016056, 0.16055691, -0.062488995, 0.051081542, 0.086364634, 0.10802774, 0.16742289, -0.08850773, 0.26072827, -0.34441397));
	conv2d_8_tf += mul(g2, min16float4x4(0.06775539, -0.19385163, 0.12488108, 0.11025669, 0.028568348, 0.051090416, -0.15175076, -0.17447716, -0.14535129, -0.15599817, -0.10742375, 0.23767987, -0.071634814, -0.19241351, -0.052424364, 0.105806515));
	conv2d_8_tf += mul(h2, min16float4x4(-0.044398658, 0.0027700714, -0.22429284, 0.11238373, -0.081747256, -0.016608216, 0.012278578, 0.036800906, 0.015081323, 0.12504977, -0.05880422, -0.05670147, -0.051358018, 0.03139849, -0.0058919964, -0.029613987));
	conv2d_8_tf += mul(i2, min16float4x4(-0.05326926, -0.06667389, -0.15082167, 0.011100974, -0.17428419, 0.06436674, 0.12850241, 0.07432186, 0.08191501, 0.24600182, -0.085727975, -0.22370532, 0.15681425, -0.112885654, 0.10803866, 0.09235784));
	conv2d_8_tf += mul(a3, min16float4x4(-0.15705872, -0.1011224, 0.11024848, 0.100342564, -0.112648144, -0.18259776, -0.0134320175, -0.19909476, 0.09715426, 0.015931793, -0.13415024, -0.1476672, -0.07625902, 0.11680044, -0.02269237, 0.013758246));
	conv2d_8_tf += mul(b3, min16float4x4(-0.24389952, 0.1949585, -0.08155146, -0.14432955, 0.061777957, 0.0053770593, 0.11755161, -0.053200334, -0.18301581, -0.015372121, -0.10212801, 0.27215135, 0.089837484, 0.011281987, -0.1765269, 0.060139008));
	conv2d_8_tf += mul(c3, min16float4x4(0.1613523, -0.051561244, -0.08003759, -0.15677674, -0.010480271, -0.05442542, 0.03414788, -0.054194316, -0.087549254, 0.22978279, -0.0047125067, 0.16779551, 0.0654713, 0.055772237, -0.009877759, 0.04076752));
	conv2d_8_tf += mul(d3, min16float4x4(-0.018052207, -0.07168355, -0.1447087, 0.2920458, 0.1345294, -0.0847823, 0.0014948811, -0.10205125, -0.044011697, -0.16249846, -0.052916005, -0.0181699, -0.08360677, -0.06418388, -0.036664434, -0.15985154));
	conv2d_8_tf += mul(e3, min16float4x4(-0.0043584667, 0.1973149, 0.07195116, 0.07608803, -0.10798404, 0.11076036, 0.23318382, -0.23839737, -0.29880977, -0.03647466, -0.13977784, -0.27129006, 0.14539374, 0.003516734, -0.17389128, -0.14548092));
	conv2d_8_tf += mul(f3, min16float4x4(-0.039712217, -0.14402422, 0.115726, 0.026172435, 0.088555016, 0.07606563, 0.047167692, -0.048009936, -0.19357018, 0.01590195, -0.08144182, 0.11633417, 0.044445038, -0.038849603, 0.02644488, 0.12953997));
	conv2d_8_tf += mul(g3, min16float4x4(-0.2535649, -0.09789916, -0.059466388, -0.17749946, -0.024909042, 0.07494422, -0.0817595, 0.20722246, 0.049061295, -0.26182574, 0.11551785, -0.11284367, -0.19183765, -0.075118415, 0.023913708, -0.13905819));
	conv2d_8_tf += mul(h3, min16float4x4(-0.009345336, 0.06655174, -0.002273717, -0.06538255, -0.015212964, 0.039716627, -0.08802585, -0.112940565, 0.018324325, 0.24168438, -0.2545027, 0.025853468, -0.11133557, -0.028638441, 0.026320668, -0.09357033));
	conv2d_8_tf += mul(i3, min16float4x4(-0.23745783, -0.032814, 0.2784286, -0.04626241, -0.02654139, -0.018567635, -0.0013748549, -0.064650096, 0.08974625, 0.04735343, -0.027304498, 0.14134395, 0.009515457, -0.0011779714, -0.001755572, 0.008599811));
	conv2d_8_tf += mul(na1, min16float4x4(-0.053202473, -0.17543721, 0.03065013, -0.11342283, 0.13609491, 0.15735649, 0.040357295, -0.062337715, 0.060803644, -0.0032487542, -0.13659185, -0.09013045, -0.058906827, -0.116660595, 0.03664988, 0.059270184));
	conv2d_8_tf += mul(nb1, min16float4x4(0.21752366, -0.06447607, -0.083456226, -0.06617954, -0.013684511, -0.1191609, -0.2506009, -0.08164425, 0.1306491, 0.19933657, 0.13410534, 0.09191758, -0.039843913, -0.06834293, 0.08471115, -0.09353382));
	conv2d_8_tf += mul(nc1, min16float4x4(-0.027393917, -0.08497713, 0.26017472, 0.2136785, -0.1488196, -0.07492567, 0.14468898, 0.16119008, 0.0121641755, 0.22242029, -0.06302512, 0.062499605, 0.06213177, -0.09802615, -0.30932772, 0.011748043));
	conv2d_8_tf += mul(nd1, min16float4x4(0.1187535, 0.04582557, -0.12194581, -0.039476555, 0.20283094, -0.10453671, 0.09578921, -0.22217935, 0.2739068, 0.09089512, -0.3268319, 0.17347647, -0.08915248, -0.13531092, 0.14857613, -0.07792796));
	conv2d_8_tf += mul(ne1, min16float4x4(-0.082583435, 0.16037074, 0.034193352, -0.07133332, -0.0669728, -0.24518156, 0.11620159, -0.10171298, -0.03303509, -0.0028717325, 0.0760564, -0.07741538, 0.046745025, -0.25254723, -0.01662034, 0.055250034));
	conv2d_8_tf += mul(nf1, min16float4x4(0.12526712, -0.0023898773, -0.3012884, -0.047304068, -0.09815741, 0.013686822, -0.050375015, 0.14987841, -0.038195454, 0.040165856, 0.014663741, 0.16414583, -0.15489048, 0.0926139, -0.21309514, -0.1200608));
	conv2d_8_tf += mul(ng1, min16float4x4(-0.09133431, -0.16783749, -0.062135316, 0.018470682, 0.022288319, -0.02211177, 0.13391319, -0.18012549, 0.49915206, 0.13974468, -0.08988157, 0.12178317, 0.0401673, 0.053748768, 0.019889776, 0.03453906));
	conv2d_8_tf += mul(nh1, min16float4x4(0.14379664, 0.08435809, 0.036211815, 0.07440852, -0.06631962, -0.12839338, 0.14946012, -0.21335278, 0.34956563, 0.5433695, -0.2727362, -0.086059555, 0.15091617, -0.1394221, 0.19740397, 0.14155756));
	conv2d_8_tf += mul(ni1, min16float4x4(-0.020419724, 0.07860248, -0.25041556, 0.043661647, -0.018286234, -0.059268583, -0.018467212, 0.04894847, -0.06933085, 0.31178948, -0.11954371, -0.0636989, 0.07150373, -0.04530066, -0.0018285213, 0.019425247));
	conv2d_8_tf += mul(na2, min16float4x4(0.09962638, -0.17088315, -0.06602017, -0.06087763, -0.1418266, -0.13101861, -0.13441323, -0.246784, -0.11813881, -0.28987116, 0.0533919, 0.058272794, -0.005445841, 0.015091582, 0.20249642, -0.105762914));
	conv2d_8_tf += mul(nb2, min16float4x4(-0.21612363, -0.1450863, -0.23284402, 0.006895393, -0.017744822, -0.20156701, 0.012746878, 0.018686332, 0.07711055, -0.10632525, -0.12213612, 0.051344417, -0.0141962785, -0.08607468, -0.05173791, -0.012742015));
	conv2d_8_tf += mul(nc2, min16float4x4(-0.35659614, 0.06504701, 0.0072779786, 0.3384698, -0.14741105, -0.107767306, -0.14098823, 0.22308472, -0.08386747, 0.09358457, 0.052461777, 0.16237038, -0.0059022917, -0.088671595, 0.14027567, -0.04549793));
	conv2d_8_tf += mul(nd2, min16float4x4(-0.23274305, 0.087585405, -0.006931044, -0.23876844, 0.08388762, -0.3022666, -0.16896221, 0.06452799, 0.2715658, -0.10732195, -0.057401773, 0.11985068, -0.06397641, -0.04235397, -0.026778454, 0.21212392));
	conv2d_8_tf += mul(ne2, min16float4x4(0.0082654, 0.28741485, -0.14546123, 0.20393674, -0.02755474, -0.120006405, 0.3581759, 0.12956442, 0.009266114, 0.012998164, 0.032407217, 0.06048391, 0.041528724, -0.13716324, 0.10482829, 0.084386185));
	conv2d_8_tf += mul(nf2, min16float4x4(-0.11990044, 0.092382684, -0.27219963, 0.15899557, -0.001977273, 0.120091155, 0.046375066, -0.21674563, 0.055842437, 0.07407933, 0.123498544, -0.08587901, 0.06925744, -0.07803027, -0.18120557, -0.0013798468));
	conv2d_8_tf += mul(ng2, min16float4x4(-0.025172636, 0.0014970741, -0.12216828, -0.07777998, -0.11570999, -0.2672482, -0.04927161, 0.047932815, 0.017598571, 0.06150582, -0.006943665, 0.06608355, 0.09816235, -0.02132959, 0.022629065, -0.11914383));
	conv2d_8_tf += mul(nh2, min16float4x4(-0.03462315, 0.0662906, 0.043817297, -0.09336832, -0.02393236, 0.12857129, -0.08293834, -0.079446144, 0.07298153, -0.22665861, 0.19360217, -0.027094053, 0.067512356, 0.054872043, 0.07353051, -0.019753326));
	conv2d_8_tf += mul(ni2, min16float4x4(0.052837294, 0.122079946, 0.10026166, -0.16611442, -0.20202795, 0.10773466, 0.016957153, -0.06257964, 0.065463126, -0.0070094382, 0.0057103466, 0.0263681, -0.083057486, 0.011921135, 0.18715331, -0.009138652));
	conv2d_8_tf += mul(na3, min16float4x4(-0.039395697, 0.047360536, 0.08876623, -0.051131938, 0.079491556, -0.062068135, -0.11143306, -0.1600982, 0.1182525, 0.0990501, 0.032290936, 0.16515383, 0.048210137, 0.27581617, 0.2143776, -0.26727012));
	conv2d_8_tf += mul(nb3, min16float4x4(0.009885355, -0.10188308, 0.014354376, -0.07466153, -0.09686006, 0.03712243, -0.07547052, -0.2513815, -0.1224751, 0.28383356, -0.11245158, -0.0022227417, 0.10997654, -0.12797359, -0.026750803, -0.15781246));
	conv2d_8_tf += mul(nc3, min16float4x4(-0.03825075, 0.0119200265, 0.13641061, 0.08023444, -0.05399191, -0.029703232, 0.11449091, 0.104263976, 0.13190906, 0.03559845, 0.00035285854, -0.24578363, -0.030404888, 0.03632663, 0.2665158, 0.287037));
	conv2d_8_tf += mul(nd3, min16float4x4(0.19444078, 0.04411847, 0.10453107, 0.16204067, -0.10203096, -0.1057438, -0.10478279, -0.10320498, 0.0060342676, 0.20314808, -0.080608025, -0.13728383, 0.23798111, 0.03982377, 0.0018392511, -0.17587116));
	conv2d_8_tf += mul(ne3, min16float4x4(0.093861975, -0.037806403, -0.023811158, 0.08989214, 0.16903597, -0.11738837, 0.057141513, 0.03039443, 0.07186046, -0.16815007, 0.041725967, 0.023349155, -0.21743254, -0.054814734, 0.21988024, -0.19913116));
	conv2d_8_tf += mul(nf3, min16float4x4(-0.098907694, 0.12669978, -0.022410035, -0.09411821, -0.037412155, 0.04395231, -0.15797623, -0.14484851, -0.036790654, -0.038002916, 0.16846262, 0.21878582, -0.053109415, -0.03769754, -0.24775061, -0.010048842));
	conv2d_8_tf += mul(ng3, min16float4x4(-0.12894969, 0.0033566963, 0.030691003, 0.033040218, -0.08500356, -0.043196633, 0.06903723, -0.17297482, -0.102706455, 0.13380836, 0.20812829, -0.054975122, -0.058504406, -0.08924625, 0.0967954, -0.12462231));
	conv2d_8_tf += mul(nh3, min16float4x4(-0.020506827, 0.040906876, 0.15277289, -0.11496513, 0.19803853, 0.011656168, 0.0041951393, 0.16394733, -0.052599292, -0.2028797, -0.012671829, 0.12447954, -0.042609632, 0.18015629, -0.047704864, -0.20819715));
	conv2d_8_tf += mul(ni3, min16float4x4(-0.04611932, -0.04080319, 0.1732811, -0.16310379, -0.0759677, -0.012633483, -0.12658887, -0.10228954, 0.11699648, 0.020952728, -0.1922721, 0.079663426, -0.017287953, 0.050658427, -0.061943304, -0.26140955));
	conv2d_8_tf += min16float4(-0.020329567, 0.07771538, 0.06740593, -0.00038238944);
	min16float4 nconv2d_8_tf = max(-conv2d_8_tf, 0);
	conv2d_8_tf = max(conv2d_8_tf, 0);
	
	min16float4 conv2d_7_tf = mul(a1, min16float4x4(0.09670644, -0.04566203, -0.10664036, -0.11654977, 0.10353238, -0.026668113, -0.06772906, -0.058057647, -0.04721855, -0.019877478, -0.16225834, -0.18661498, -0.1137224, 0.01452415, 0.09002202, -0.07991262));
	conv2d_7_tf += mul(b1, min16float4x4(0.12247382, 0.10237518, 0.04044118, -0.04867563, 0.106729075, 0.19503647, -0.01294371, 0.12316606, 0.08497549, -0.01606401, 0.031219587, 0.1474753, -0.14370713, -0.24351072, -0.17444824, 0.12567697));
	conv2d_7_tf += mul(c1, min16float4x4(-0.05373204, -0.11406721, -0.04307548, -0.0011615923, 0.09172633, -0.034839034, 0.12179155, -0.032049768, -0.036665026, 0.02375685, 0.01977139, -0.115673535, -0.065757565, 0.12521514, 0.03739438, -0.012122441));
	conv2d_7_tf += mul(d1, min16float4x4(0.0037090098, -0.09165263, -0.22216173, -0.09436383, -0.018459387, 0.15764487, 0.106846556, -0.15703869, -0.1056327, 0.100443825, 0.15728104, -0.07118126, -0.071113996, 0.07175751, 0.1066827, 0.015554562));
	conv2d_7_tf += mul(e1, min16float4x4(-0.08138076, -0.005017353, 0.0024575114, -0.0280491, -0.1689416, -0.24320668, -0.07413122, -0.026848925, -0.17659375, 0.095876895, 0.1875987, -0.0052445224, 0.0041429237, -0.13173698, -0.21236134, 0.14331093));
	conv2d_7_tf += mul(f1, min16float4x4(-0.023982342, -0.028810123, -0.1591679, -0.02026218, -0.16651444, 0.050990265, -0.1640659, -0.109770395, -0.06517823, 0.06647583, 0.09519326, -0.14313333, 0.061294477, 0.066543005, 0.12260083, -0.1436599));
	conv2d_7_tf += mul(g1, min16float4x4(0.07363797, -0.07069135, -0.01332299, -0.1166729, -0.17299873, 0.10319499, 0.17256232, -0.15059224, 0.12490272, 0.03816397, -0.07081764, -0.0005555199, 0.009463498, -0.080442056, 0.05372971, -0.01984048));
	conv2d_7_tf += mul(h1, min16float4x4(0.07747191, 0.038767997, -0.042611655, -0.025650622, -0.20976418, 0.11478602, 0.05521954, 0.03552756, 0.012396808, 0.10836491, 0.01147957, 0.17223893, -0.09354668, -0.061399113, 0.03731426, -0.095968515));
	conv2d_7_tf += mul(i1, min16float4x4(0.0029518164, -0.07522048, -0.30731654, 0.14996396, -0.09563301, -0.1635997, 0.16482228, -0.33490175, 0.034455117, -0.124511935, 0.003454064, -0.011791387, -0.08124914, -0.020552732, 0.14202276, -0.053646516));
	conv2d_7_tf += mul(a2, min16float4x4(0.029005067, -0.019747132, 0.041804817, 0.10725602, 0.09535564, 0.17670439, 0.18999198, 0.06499296, 0.09519827, -0.09794806, 0.10868586, -0.038871128, -0.092565574, -0.018548176, 0.028203959, -0.050549477));
	conv2d_7_tf += mul(b2, min16float4x4(0.10629401, -0.01204608, -0.0766338, 0.112705976, -0.103695825, 0.10200874, 0.008448839, 0.017780313, -0.024469525, 0.1860687, 0.14225325, 0.15677285, -0.14190355, -0.22543404, 0.024092557, -0.2790124));
	conv2d_7_tf += mul(c2, min16float4x4(0.08649951, 0.040031336, -0.010628009, -0.04257323, 0.052871518, 0.06654039, -0.07866483, 0.09136843, -0.10960993, -0.029104995, 0.18752916, 0.022354944, -0.15167497, -0.04915799, -0.03720373, 0.18194139));
	conv2d_7_tf += mul(d2, min16float4x4(-0.029030664, 0.063362755, 0.010331715, 0.034228537, -0.010749333, 0.026652085, -0.06266523, -0.047827587, 0.19567958, -0.07156196, 0.080418445, 0.040099807, 0.06901692, -0.10262759, 0.10190994, 0.1662688));
	conv2d_7_tf += mul(e2, min16float4x4(-0.04938947, 0.20808902, -0.012551209, 0.13833791, -0.08467056, -0.06768094, -0.0035055066, 0.2141383, 0.011813273, -0.094283104, -0.11627318, 0.0035407832, -0.16360888, -0.04307167, 0.18481791, 0.07308102));
	conv2d_7_tf += mul(f2, min16float4x4(0.058353335, 0.09541393, 0.013101275, -0.081891365, 0.08742119, -0.005137093, 0.025961146, -0.037318625, -0.14933549, 0.06090928, 0.12738119, -0.10817076, -0.13165309, 0.16108744, -0.13503371, 0.15482368));
	conv2d_7_tf += mul(g2, min16float4x4(-0.034848627, -0.0430948, -0.048124265, -0.04486795, -0.035008915, 0.08321689, -0.04977505, 0.048597503, 0.020555262, -0.07508485, 0.20037362, 0.06753769, 0.058704067, -0.009009662, -0.05421176, 0.20524938));
	conv2d_7_tf += mul(h2, min16float4x4(-0.12115005, 0.045643892, 0.112293474, 0.022908293, 0.073470674, -0.067966096, -0.017103313, -0.13648018, -0.07021163, 0.031020392, -0.048876107, 0.10397969, -0.005251243, -0.2611716, -0.07903786, 0.3444416));
	conv2d_7_tf += mul(i2, min16float4x4(0.10680049, -0.09858707, -0.0010306702, 0.10842332, -0.09013634, 0.02091661, 0.22192872, -0.15876925, 0.035971455, -0.04786045, 0.009500665, 0.09247623, 0.013221849, 0.1912487, -0.12753724, -0.061068386));
	conv2d_7_tf += mul(a3, min16float4x4(-0.03980972, -0.1474463, 0.22852057, -0.030534718, 0.103116564, -0.024893943, 0.023735823, -0.19768827, -0.088497065, -0.20338957, -0.022078201, -0.058560856, 0.16291575, 0.014483492, -0.093514696, 0.14760342));
	conv2d_7_tf += mul(b3, min16float4x4(-0.09319041, 0.08757541, 0.024344994, -0.004351115, 0.0023287807, 0.036806494, -0.02552934, -0.06227957, -0.1354203, 0.0283256, 0.2185213, -0.087060206, -0.022696337, -0.16076073, -0.20330715, 0.036380492));
	conv2d_7_tf += mul(c3, min16float4x4(-0.041115735, -0.023528732, -0.10124798, 0.21328308, -0.009342506, 0.07328608, 0.009285847, -0.23402044, 0.13117228, 0.1009154, 0.18027642, 0.074597865, 0.09881346, -0.00081656995, -0.002189424, -0.105243));
	conv2d_7_tf += mul(d3, min16float4x4(0.11213601, -0.23114498, 0.10217712, -0.083360896, 0.07913656, -0.039601568, 0.11367716, -0.034739245, -0.14472133, -0.035573903, -0.35375246, 0.040547356, -0.1504422, -0.15183373, -0.08146184, -0.015926573));
	conv2d_7_tf += mul(e3, min16float4x4(0.007678496, 0.045396518, 0.067442104, 0.357935, 0.1795549, -0.028398065, 0.26147032, -0.22306849, -0.028738718, -0.10074325, -0.08521542, -0.020190565, -0.175108, -0.26179528, -0.1149573, 0.05406529));
	conv2d_7_tf += mul(f3, min16float4x4(0.030697253, 0.06005289, 0.024412693, -0.013535843, 0.030500244, 0.14023077, -0.047582973, 0.07610684, 0.0571624, 0.19386198, 0.021660715, 0.03154867, -0.03788935, -0.08817162, 0.0053847465, -0.015165054));
	conv2d_7_tf += mul(g3, min16float4x4(-0.26646808, -0.2275448, -0.0619738, 0.104571655, 0.024079306, 0.033514917, 0.016844772, -0.14415953, -0.01694689, -0.0072623887, -0.12263149, 0.030444223, -0.03220662, 0.022894913, 0.03112325, -0.036533017));
	conv2d_7_tf += mul(h3, min16float4x4(-0.15611476, -0.19298914, -0.17546865, -0.080604054, 0.07597506, 0.097353615, 0.029924694, -0.078176685, -0.12268953, -0.05687716, -0.05294087, -0.18172315, -0.0773961, 0.084935166, -0.009803619, 0.040560953));
	conv2d_7_tf += mul(i3, min16float4x4(-0.10773278, -0.0012994999, 0.004722267, -0.057820093, -0.10506255, 0.029771779, 0.015667265, 0.14186347, -0.108355746, -0.11185942, 0.022062123, -0.123649485, -0.0666645, -0.0107138315, -0.0130763, -0.046252076));
	conv2d_7_tf += mul(na1, min16float4x4(-0.031815648, -0.0084208995, -0.072824255, -0.1508182, -0.064399414, 0.021369422, -0.18965991, 0.03649226, 0.15370539, -0.117377125, 0.15578026, 0.15059558, 0.1423233, 0.013444947, -0.16911474, -0.21899599));
	conv2d_7_tf += mul(nb1, min16float4x4(-0.050074972, 0.06591971, -0.20185336, -0.19894198, -0.045794237, -0.09582899, 0.019117232, 0.054774716, 0.00469303, 0.08466791, -0.10310348, 0.03430011, -0.05189703, 0.08612288, -0.09612641, 0.15337339));
	conv2d_7_tf += mul(nc1, min16float4x4(-0.058103696, -0.13447452, -0.06501768, -0.08269111, -0.043869898, 0.0398948, 0.033771295, -0.021524182, 0.0027115596, -0.030671224, 0.045388903, 0.04590158, -0.26087472, -0.16301683, 0.03324832, 0.024285218));
	conv2d_7_tf += mul(nd1, min16float4x4(-0.051421262, 0.15028518, 0.06384462, -0.08590671, 0.101886876, -0.012882116, -0.051741008, 0.11888618, -0.15590154, -0.38625813, 0.042900138, 0.22492291, -0.09111901, -0.005388837, 0.051056426, 0.043860577));
	conv2d_7_tf += mul(ne1, min16float4x4(-0.079883516, 0.05735032, 0.10719803, 0.16519663, -0.11724404, 0.25990528, 0.012375103, -0.010302452, 0.49185735, 0.1696493, 0.060474537, 0.3722603, 0.014323083, -0.16412182, -0.059749532, -0.24289557));
	conv2d_7_tf += mul(nf1, min16float4x4(-0.034733526, -0.084441185, -0.04596736, -0.0042962483, -0.0392975, -0.11149175, 0.14051792, 0.0702665, 0.117540844, -0.102869704, 0.27858627, 0.069043316, 0.04871729, -0.24745311, -0.058776632, -0.0017110928));
	conv2d_7_tf += mul(ng1, min16float4x4(-0.06277427, 0.16004023, -0.11507597, 0.15097888, 0.027060283, 0.1953599, -0.0031669976, -0.0005737168, -0.19876455, -0.23691651, 0.17741823, -0.12453466, -0.040428206, -0.0018632353, 0.023173677, -0.076046385));
	conv2d_7_tf += mul(nh1, min16float4x4(0.13513252, 0.0295901, -0.006554118, 0.06786791, 0.15473233, 0.012762339, 0.1927368, -0.06255987, -0.30587965, -0.44131213, -0.086936355, 0.011615333, 0.097696826, 0.02502633, 0.08837973, -0.07914361));
	conv2d_7_tf += mul(ni1, min16float4x4(-0.013541286, -0.034861088, 0.052821327, 0.037984103, 0.04338181, -0.0133451065, 0.041617934, -0.034278907, -0.053211715, -0.16200064, 0.11068738, -0.0867221, 0.04498939, 0.045188803, -0.05908562, 0.081477076));
	conv2d_7_tf += mul(na2, min16float4x4(-0.15266198, 0.22576767, 0.030019565, -0.045541495, 0.04881405, 0.0142783765, -0.1529103, 0.18320109, -0.00480197, 0.094124764, -0.010995377, 0.01641767, -0.010706163, 0.100903675, 0.19038767, -0.18477328));
	conv2d_7_tf += mul(nb2, min16float4x4(0.008087569, 0.13434748, -0.32156894, 0.07736676, 0.10494717, -0.11782738, -0.0029439328, -0.09557844, 0.015514035, -0.089648925, -0.17554814, -0.14883392, -0.04063905, 0.050346915, -0.08932905, -0.010719376));
	conv2d_7_tf += mul(nc2, min16float4x4(-0.11777635, -0.33014166, 0.34624732, 0.11740032, 0.1543961, -0.019076902, -0.12216481, -0.017081184, -0.00078788324, 0.031078909, -0.028584918, -0.026835786, 0.091864, 0.05272115, -0.12571204, 0.008416047));
	conv2d_7_tf += mul(nd2, min16float4x4(-0.043549653, 0.1039711, -0.20336658, -0.010299696, -0.27827185, 0.019381372, -0.1632188, 0.077465065, 0.20229691, -0.069236994, 0.014810417, 0.22877559, 0.02143673, 0.17381601, 0.09082899, -0.053508762));
	conv2d_7_tf += mul(ne2, min16float4x4(0.2391153, -0.19723871, -0.25610062, 0.07108974, -0.03182384, 0.2192639, -0.09241812, 0.048452295, -0.021405702, -0.2554734, -0.1965786, 0.20361422, -0.14465299, 0.058985952, -0.025833346, -0.10550291));
	conv2d_7_tf += mul(nf2, min16float4x4(-0.39593056, -0.4537898, 0.023792682, 0.37393433, 0.041772638, -0.020854915, 0.050651625, 0.0766088, 0.23962118, -0.06411897, -0.106468715, 0.17854762, 0.03402648, 0.0236968, -0.033498786, -0.12094796));
	conv2d_7_tf += mul(ng2, min16float4x4(-0.2517486, -0.011749091, -0.08157814, 0.1392019, 0.042420883, -0.23219018, 0.05053571, 0.13250825, -0.050171047, 0.15462638, -0.043420136, -0.014093825, 0.16176236, -0.14638837, -0.0071619265, -0.055462677));
	conv2d_7_tf += mul(nh2, min16float4x4(-0.3264325, -0.30403548, -0.15088049, -0.010203428, -0.018360123, -0.060466267, -0.090672255, -0.13885537, -0.038393795, 0.20886149, -0.10593147, 0.017991208, 0.08373391, 0.20925963, 0.028997745, 0.06881825));
	conv2d_7_tf += mul(ni2, min16float4x4(0.19107129, -0.16896184, -0.12929466, 0.07562441, 0.064231046, 0.0864716, -0.03966105, 0.09153016, -0.0628452, -0.015886426, -0.07048391, -0.24076262, 0.011216516, 0.07708032, -0.03814493, 0.13395755));
	conv2d_7_tf += mul(na3, min16float4x4(-0.05879415, -0.019550052, -0.023919582, -0.11289196, -0.0064408537, 0.07402445, 0.058795378, 0.15885338, -0.043667927, 0.10769252, 0.030309072, 0.048533317, -0.2524471, 0.059829284, 0.0797783, -0.019442867));
	conv2d_7_tf += mul(nb3, min16float4x4(-0.0038486274, -0.04580634, 0.07400007, -0.031162377, 0.10273923, 0.008071164, 0.11991736, 0.026728682, 0.026876984, -0.07799812, 0.1297364, 0.14695424, -0.06859438, -0.10330936, -0.07446633, 0.02616857));
	conv2d_7_tf += mul(nc3, min16float4x4(-0.16036308, 0.04957999, 0.01030331, -0.1962486, 0.103015296, -0.007340536, -0.049429756, 0.07165493, 0.008103339, 0.083655335, 0.098038, -0.1358248, -0.25885662, 0.029940864, -0.008321852, 0.2294651));
	conv2d_7_tf += mul(nd3, min16float4x4(-0.06087098, 0.00019651231, 0.03534409, 0.03318348, -0.0879954, 0.034764756, -0.30367124, -0.09713905, -0.026543869, -0.089636214, 0.12096616, -0.034594636, 0.054902434, -0.09290082, -0.07779638, -0.0821119));
	conv2d_7_tf += mul(ne3, min16float4x4(0.13779263, 0.18896884, -0.076830864, -0.09442952, -0.23735744, -0.014474691, 0.009051341, 0.10342686, 0.041046456, -0.10701024, -0.18442988, 0.02789949, -0.00074035715, -0.025513707, -0.040514592, 0.036068246));
	conv2d_7_tf += mul(nf3, min16float4x4(-0.048401676, 0.20745294, 0.0070508514, -0.0705337, -0.022934115, -0.043547787, 0.04628692, -0.07658743, -0.10154497, -0.13417569, -0.0013773212, 0.14263885, -0.07437275, -0.13121726, 0.12632057, 0.034687687));
	conv2d_7_tf += mul(ng3, min16float4x4(-0.027830327, -0.030560987, 0.12718935, -0.102934904, -0.02562363, 0.008175067, -0.0028858446, -0.015783066, 0.15272577, 0.10772941, 0.043485314, 0.014232708, 0.08577555, -0.16121073, 0.026591625, -0.055126593));
	conv2d_7_tf += mul(nh3, min16float4x4(-0.06485661, -0.11781964, -0.1421969, -0.16376711, 0.18121801, 0.123108625, -0.15428194, -0.06915854, 0.05089843, 0.08377868, 0.09607435, -0.02494757, -0.076740764, -0.19782536, -0.3470603, 0.037040427));
	conv2d_7_tf += mul(ni3, min16float4x4(0.10614744, 0.09086957, -0.02948694, 0.017862784, 0.027194018, 0.069870904, -0.021802098, 0.21401364, 0.11846571, -0.056183722, -0.071595654, 0.029162262, -0.124404505, -0.072095454, 0.040073395, -0.02816261));
	conv2d_7_tf += min16float4(-0.034254678, 0.047492404, -0.00038721046, -0.00072104816);
	tex9[gxy] = conv2d_7_tf;
	min16float4 nconv2d_7_tf = max(-conv2d_7_tf, 0);
	conv2d_7_tf = max(conv2d_7_tf, 0);

	min16float4 target = mul(e1, min16float4x4(-0.20878315, 0.073090814, 0.34913197, 0.04554434, -0.3036766, 0.04255219, 0.060676616, 0.24025755, -0.019680336, -0.15252031, -0.03416314, -0.072506554, 0.013241457, -0.10496547, 0.050562985, -0.033250205));
	target += mul(e2, min16float4x4(-0.18049034, 0.09664636, 0.41482204, 0.23575203, -0.05704124, -0.044852983, 0.1783455, -0.017561441, -0.06852369, 0.014129533, -0.21115111, -0.22699773, 0.38242704, 0.01165174, 0.04190493, -0.2141891));
	target += mul(e3, min16float4x4(-0.011946614, -0.16289592, 0.041371312, 0.40975794, 0.0041022287, -0.23657559, 0.10817027, -0.26924378, -0.12006245, 0.26678962, 0.072988346, -0.2085322, 0.0048250603, 0.12894252, 0.07966851, 0.24471562));
	target += mul(ne1, min16float4x4(0.18590502, 0.0845459, -0.12875262, 0.26096, 0.029233042, 0.36381075, 0.117661506, 0.006412487, 0.20946807, 0.07426911, 0.029169528, 0.0654646, 0.16450708, 0.12593012, -0.109644994, 0.14572893));
	target += mul(ne2, min16float4x4(0.1973355, -0.2275125, -0.28223652, 0.31719315, 0.3813502, 0.2693579, -0.037815563, -0.16148391, 0.12829015, -0.0030689894, 0.022164742, 0.035949815, -0.3378249, -0.13235879, 0.15883659, -0.17731927));
	target += mul(ne3, min16float4x4(-0.2885664, 0.14904943, -0.19845994, 0.23251331, -0.30293494, 0.02003626, 0.20378608, 0.27291408, -0.16427508, -0.1587996, -0.22501752, -0.04937006, -0.115756296, 0.09290222, -0.26140857, -0.014537909));
	target += mul(conv2d_8_tf, min16float4x4(-0.1513065, -0.31879196, -0.2727547, -0.4583672, 0.3103975, -0.09158548, 0.009788355, -0.09834531, 0.011489709, 0.042706747, 0.37254226, 0.15954055, 0.2172001, 0.09373807, 0.29088458, -0.35286763));
	target += mul(nconv2d_8_tf, min16float4x4(0.23374696, 0.33407655, 0.23616461, -0.09521148, -0.14927168, 0.11939751, 0.42869845, -0.16612507, -0.2706815, 0.16172597, -0.5814591, -0.11577833, 0.065650895, -0.3334003, -0.41168052, 0.32357255));
	target += mul(conv2d_1_tf, min16float4x4(0.3248823, -0.27207342, -0.048840526, -0.217887, -0.018053366, -0.24292938, 0.1603505, 0.06505262, -0.010766065, 0.07076721, 0.22251016, -0.041497335, -0.09878612, 0.2061045, 0.080330074, -0.029014835));
	target += mul(nconv2d_1_tf, min16float4x4(-0.26376098, -0.04971863, -0.03045489, 0.009807002, 0.11108562, 0.0693266, 0.15279642, -0.1372833, 0.18326105, -0.059612468, -0.005589879, 0.021735538, -0.027800532, -0.14984077, -0.116767704, -0.06531209));
	target += mul(conv2d_4_tf, min16float4x4(0.19206688, 0.21824414, 0.03791829, 0.22117318, 0.01257811, -0.044042267, 0.25616458, 0.082941554, -0.1181948, -0.17940602, -0.20808466, -0.06987383, 0.0019713745, -0.1609917, 0.153718, -0.32214788));
	target += mul(nconv2d_4_tf, min16float4x4(-0.19472712, -0.007020553, -0.36049378, -0.24589752, -0.011828978, 0.38882232, -0.3257698, 0.08382738, -0.09556564, -0.20949766, -0.32732338, 0.08303877, -0.107999764, 0.2836336, -0.0661124, 0.24043255));
	target += mul(conv2d_7_tf, min16float4x4(-0.1972939, 0.12734106, -0.09953153, -0.45152718, -0.15855458, 0.08746372, 0.11452114, 0.030538268, 0.11946308, 0.17044471, -0.24375156, -0.10093911, 0.19120134, -0.14312318, -0.14860255, -0.1223525));
	target += mul(nconv2d_7_tf, min16float4x4(0.14979935, -0.3136038, -0.25878516, 0.12995318, -0.075706124, -0.104598634, 0.1455947, -0.6167443, 0.06843719, -0.16347055, 0.04413483, 0.08870554, -0.29839858, 0.07214889, 0.049274225, -0.15555117));
	target += min16float4(-0.004266169, -0.020547107, -0.0031655694, 0.0643683);
	tex5[gxy] = target;
	
	target = mul(e1, min16float4x4(0.06760422, 0.16268754, -0.14517367, -0.023386402, -0.23272006, 0.48739922, 0.06399116, -0.032946702, -0.17306012, 0.334446, 0.17779559, -0.2660973, -0.3468709, 0.51220256, -0.010311926, -0.040047005));
	target += mul(e2, min16float4x4(-0.0538168, -0.048309397, 0.064760834, 0.09675621, 0.20269404, -0.2615111, -0.27282992, -0.12584937, 0.10904846, -0.15973651, -0.076846495, -0.09462694, 0.12722874, 0.21629119, -0.35314724, -0.086036965));
	target += mul(e3, min16float4x4(-0.049174394, -0.05765949, 0.21250841, 0.17151582, 0.15764381, 0.040890984, 0.05118504, -0.14658877, 0.05469671, 0.13701054, 0.20377803, -0.39008877, -0.0016028697, 0.13317284, -0.11653242, 0.12591232));
	target += mul(ne1, min16float4x4(0.21234287, -0.3048995, -0.12653783, -0.109162085, -0.050768167, -0.17156011, 0.05592974, 0.27197394, -0.19419932, -0.046344608, -0.05445905, -0.13253787, 0.05778321, 0.16979085, -0.04466505, -0.06867837));
	target += mul(ne2, min16float4x4(-0.18974759, 0.22814974, -0.007522141, -0.10096491, -0.26759568, 0.32048568, 0.2660603, 0.112091035, 0.41875598, -0.1051111, 0.06525224, 0.27191457, 0.017352497, -0.31743342, 0.29108858, 0.26573792));
	target += mul(ne3, min16float4x4(0.031855166, -0.122523904, -0.28207538, 0.12833035, -0.025733596, 0.008542537, -0.1891138, 0.16361842, 0.058317598, -0.007289248, 0.03349703, -0.038986582, 0.18147361, -0.3912238, 0.024964351, 0.14339498));
	target += mul(conv2d_8_tf, min16float4x4(0.37369347, -0.012460246, -0.037854888, 0.067713045, -0.06288331, 0.26436228, -0.058873445, 0.04463945, -0.04286497, -0.04824939, 0.17835206, -0.036378298, 0.33058742, -0.14685723, 0.1025378, 0.051385757));
	target += mul(nconv2d_8_tf, min16float4x4(-0.131484, -0.040644694, -0.14443769, 0.1950223, 0.09507341, 0.48859578, -0.26267928, 0.24538381, -0.063596986, -0.18749404, -0.031884808, -0.07132067, -0.04606875, 0.03708701, -0.26145473, 0.2371378));
	target += mul(conv2d_1_tf, min16float4x4(0.094301306, -0.08795415, -0.035933804, 0.21765485, -0.29858732, 0.11440603, 0.14095801, 0.18262209, -0.08135902, -0.45404965, 0.20399955, -0.06393024, 0.023793167, 0.16001467, -0.11817577, -0.16322103));
	target += mul(nconv2d_1_tf, min16float4x4(0.07168084, 0.0879652, -0.083207026, -0.045181375, 0.07845201, -0.15828066, 0.05710845, 0.05699917, -0.061211787, 0.039662443, 0.036026876, 0.14224064, -0.23701179, 0.01259322, -0.091701694, 0.42408752));
	target += mul(conv2d_4_tf, min16float4x4(0.017442457, -0.1311232, -0.22520894, -0.049517628, -0.20945188, -0.035541452, -0.13055338, -0.04001523, -0.09402065, -0.19641486, -0.10066238, 0.115912616, -0.10684873, 0.02787531, 0.28450257, 0.02690632));
	target += mul(nconv2d_4_tf, min16float4x4(-0.2659566, 0.43625832, -0.0695883, -0.2624756, -0.2827253, -0.22893822, 0.26025924, 0.24121284, 0.2272709, 0.2178127, -0.15199527, 0.32607552, 0.005909836, 0.056527212, 0.19446251, -0.010751997));
	target += mul(conv2d_7_tf, min16float4x4(0.1273358, -0.28996274, -0.19322409, 0.018734567, 0.48555133, -0.17389202, 0.13595583, 0.46163267, -0.08973322, -0.30239192, 0.49897516, 0.021815563, -0.2589829, 0.0039008032, 0.056682784, 0.048075546));
	target += mul(nconv2d_7_tf, min16float4x4(0.415353, 0.112207405, 0.20997275, 0.033321556, -0.1327579, 0.12338585, 0.61820966, -0.3411527, 0.018252999, 0.05708125, -0.24571265, 0.11019793, 0.24145919, 0.20340635, -0.0693869, 0.16271423));
	target += min16float4(-0.07107039, 0.0061239223, 0.0013546069, 0.02994767);
	tex6[gxy] = target;
	
	target = mul(e1, min16float4x4(0.0014731521, -0.15165007, 0.04889816, -0.23228844, 0.11362322, 0.07071926, -0.23770805, -0.04347728, -0.16787082, -0.008313435, -0.42370048, 0.08681679, 0.10611205, -0.012660734, 0.10022364, 0.027629996));
	target += mul(e2, min16float4x4(-0.35393402, 0.018436229, 0.10629333, 0.029471794, -0.21129252, -0.301571, 0.0045201713, -0.15636055, 0.298371, 0.11426107, 0.018450111, -0.13657977, 0.22216578, 0.009629214, 0.5373198, 0.30699998));
	target += mul(e3, min16float4x4(-0.1504586, -0.16447587, -0.2739809, -0.14074785, 0.39510623, -0.08384201, 0.14561974, -0.43195033, -0.055713434, 0.12800978, 0.2829296, -0.23494978, 0.14326042, -0.09509476, -0.3169162, 0.124649614));
	target += mul(ne1, min16float4x4(-0.23705968, 0.15959233, 0.11467344, 0.15141489, -0.096755706, 0.023953263, 0.13856179, 0.024189185, 0.13272291, 0.46271062, 0.55494446, -0.14286532, 0.1501738, 0.28827608, 0.058801714, 0.029045105));
	target += mul(ne2, min16float4x4(-0.002308931, 0.07281086, -0.5197955, 0.079986535, 0.38919175, 0.3164044, 0.35857818, 0.09364757, 0.17373051, -0.1447216, -0.05244769, 0.15533692, 0.046295535, -0.19459103, -0.33215967, -0.15369573));
	target += mul(ne3, min16float4x4(0.11478203, -0.29375935, -0.19501545, -0.081721894, -0.103483915, 0.041965716, 0.056954723, 0.19596405, -0.13819647, 0.010641367, -0.11124998, -0.08675409, 0.036859434, 0.23720297, 0.14129876, -0.044769786));
	target += mul(conv2d_8_tf, min16float4x4(0.08397742, -0.12651941, 0.17676216, -0.084249385, 0.36716628, 0.039452277, -0.27606088, -0.36796048, 0.31680533, 0.14186403, 0.4466997, 0.13315229, 0.011085958, -0.17513317, 0.13940759, 0.27495402));
	target += mul(nconv2d_8_tf, min16float4x4(-0.1870658, 0.18817395, 0.010469263, -0.39973256, -0.57167524, -0.38714117, -0.26255277, 0.14361858, 0.018649995, 0.15935089, -0.21745402, -0.0056655053, -0.15408997, -0.03154883, -0.29631105, 0.27472818));
	target += mul(conv2d_1_tf, min16float4x4(-0.07735958, 0.042861674, 0.36729267, -0.2362879, -0.15516327, -0.009109079, 0.063800156, -0.253287, 0.4471074, 0.0944695, -0.26948866, -0.07759066, 0.045151226, -0.13749917, 0.14566323, -0.13593693));
	target += mul(nconv2d_1_tf, min16float4x4(0.28955856, 0.09293573, 0.07423561, 0.1616493, 0.22285056, 0.01639275, 0.026332684, -0.14958683, -0.32087958, -0.3138252, -0.17335242, -0.38171476, -0.25562596, -0.022701526, 0.17425084, -0.042576227));
	target += mul(conv2d_4_tf, min16float4x4(0.24964347, -0.07078707, 0.18416835, -0.054758202, -0.061644293, -0.0964391, 0.14583856, -0.34874785, -0.3402768, 0.14743538, 0.36047265, 0.04471611, 0.015971184, 0.25227246, -0.011749087, -0.18359871));
	target += mul(nconv2d_4_tf, min16float4x4(-0.059328917, -0.07904788, -0.23883855, -0.06956805, -0.040810965, 0.09536262, 0.0018617791, -0.1898438, 0.1794419, 0.11382087, -0.16192305, 0.22020166, 0.03995484, -0.19086155, -0.2970539, 0.14597812));
	target += mul(conv2d_7_tf, min16float4x4(-0.034995254, 0.060782332, -0.0519364, 0.41303346, -0.06989344, 0.21384521, 0.31474474, 0.12592849, 0.17633408, -0.2764535, 0.36884397, -0.015302021, 0.02951528, 0.094452016, 0.13392285, 0.14435606));
	target += mul(nconv2d_7_tf, min16float4x4(0.13522784, 0.101011604, 0.04657966, -0.043399148, 0.008192044, 0.0027336285, 0.011269824, 0.09976881, -0.026473437, -0.124423906, -0.19602631, -0.09871594, -0.10603456, 0.057509303, -0.09007557, -0.14438893));
	target += min16float4(-0.07283617, -0.09245546, -0.006695486, -0.013076421);
	tex7[gxy] = target;
}


//!PASS 5
//!DESC Conv-4x3x3x24, Conv-4x1x1x64
//!IN tex5, tex6, tex7, tex4, tex8, tex9
//!OUT tex10, tex11, tex1, tex2, tex3
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass5(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	min16float4 a1 = tex5.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b1 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c1 = tex5.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d1 = tex5.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e1 = tex5.SampleLevel(sam, pos, 0);
	min16float4 f1 = tex5.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g1 = tex5.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h1 = tex5.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i1 = tex5.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na1 = max(-a1, 0);
	min16float4 nb1 = max(-b1, 0);
	min16float4 nc1 = max(-c1, 0);
	min16float4 nd1 = max(-d1, 0);
	min16float4 ne1 = max(-e1, 0);
	min16float4 nf1 = max(-f1, 0);
	min16float4 ng1 = max(-g1, 0);
	min16float4 nh1 = max(-h1, 0);
	min16float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);
	
	min16float4 a2 = tex6.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b2 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c2 = tex6.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d2 = tex6.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e2 = tex6.SampleLevel(sam, pos, 0);
	min16float4 f2 = tex6.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g2 = tex6.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h2 = tex6.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i2 = tex6.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na2 = max(-a2, 0);
	min16float4 nb2 = max(-b2, 0);
	min16float4 nc2 = max(-c2, 0);
	min16float4 nd2 = max(-d2, 0);
	min16float4 ne2 = max(-e2, 0);
	min16float4 nf2 = max(-f2, 0);
	min16float4 ng2 = max(-g2, 0);
	min16float4 nh2 = max(-h2, 0);
	min16float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);
	
	min16float4 a3 = tex7.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b3 = tex7.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c3 = tex7.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d3 = tex7.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e3 = tex7.SampleLevel(sam, pos, 0);
	min16float4 f3 = tex7.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g3 = tex7.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h3 = tex7.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i3 = tex7.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na3 = max(-a3, 0);
	min16float4 nb3 = max(-b3, 0);
	min16float4 nc3 = max(-c3, 0);
	min16float4 nd3 = max(-d3, 0);
	min16float4 ne3 = max(-e3, 0);
	min16float4 nf3 = max(-f3, 0);
	min16float4 ng3 = max(-g3, 0);
	min16float4 nh3 = max(-h3, 0);
	min16float4 ni3 = max(-i3, 0);

	a3 = max(a3, 0);
	b3 = max(b3, 0);
	c3 = max(c3, 0);
	d3 = max(d3, 0);
	e3 = max(e3, 0);
	f3 = max(f3, 0);
	g3 = max(g3, 0);
	h3 = max(h3, 0);
	i3 = max(i3, 0);
	
	min16float4 conv2d_1_tf = tex4.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0);
	conv2d_1_tf = max(conv2d_1_tf, 0);
	
	min16float4 conv2d_4_tf = tex8.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0);
	conv2d_4_tf = max(conv2d_4_tf, 0);
	
	min16float4 conv2d_7_tf = tex9.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_7_tf = max(-conv2d_7_tf, 0);
	conv2d_7_tf = max(conv2d_7_tf, 0);

	min16float4 conv2d_11_tf = mul(a1, min16float4x4(-0.47819614, -0.0145807015, -0.14235033, -0.06459091, 0.051679384, -0.24727756, 0.16531977, 0.23668537, -0.044610042, -0.03163047, -0.024059737, 0.21251118, -0.02900184, -0.11218355, 0.085020125, -0.08413842));
	conv2d_11_tf += mul(b1, min16float4x4(-0.083133794, 0.08406656, 0.20940667, 0.21155417, -0.12855776, -0.061865382, 0.2486309, 0.13191856, -0.028019775, 0.11366226, 0.13459402, 0.18391807, -0.09688631, 0.011591497, -0.2466206, -0.02237942));
	conv2d_11_tf += mul(c1, min16float4x4(0.0037495645, 0.019915475, 0.07625364, 0.07119373, 0.18423386, 0.07686032, -0.013689673, -0.11513128, -0.12845139, 0.273121, 0.077030145, 0.13114497, 0.04543684, 0.09308563, 0.19357756, 0.24509594));
	conv2d_11_tf += mul(d1, min16float4x4(0.03458686, -0.42040396, -0.104271114, 0.1918791, -0.25708342, 0.03583752, 0.2589993, -0.050576445, 0.0043004244, 0.19324894, 0.080590524, 0.14723596, 0.041485116, 0.13033897, 0.28028202, -0.058933));
	conv2d_11_tf += mul(e1, min16float4x4(-0.09480703, -0.13742156, -0.30406207, -0.03582789, 0.12367775, 0.064455606, -0.061555192, 0.06453598, 0.0917327, 0.04270991, 0.21958654, 0.13570474, -0.12048236, -0.024039079, 0.11226094, 0.050419748));
	conv2d_11_tf += mul(f1, min16float4x4(0.02062305, -0.10862912, 0.12883052, 0.18951532, -0.03850205, 0.11498875, 0.16137509, 0.009759631, -0.09211893, 0.0708826, 0.15651149, 0.19246778, 0.056577608, -0.0871854, 0.090261444, -0.019027064));
	conv2d_11_tf += mul(g1, min16float4x4(0.2780629, -0.054287303, -0.13351089, 0.021154758, -0.12753387, 0.031183334, 0.29430825, -0.06750467, -0.044209514, -0.042159047, -0.12532234, 0.006239919, -0.1961551, 0.099502094, 0.11470277, 0.10832906));
	conv2d_11_tf += mul(h1, min16float4x4(-0.2063426, -0.24898255, -0.28594568, 0.15958025, 0.03609107, 0.06394462, 0.022269696, -0.058725126, -0.104284525, 0.16744058, -0.14197277, -0.0051877275, -0.14164501, 0.021519974, -0.12835859, -0.12090698));
	conv2d_11_tf += mul(i1, min16float4x4(-0.45966595, 0.015630098, -0.3182287, 0.10282032, 0.14680836, -0.23460387, 0.15356645, 0.017346757, 0.05120857, -0.035891768, -0.092325106, 0.005394217, -0.09328155, -0.012819384, 0.14028293, 0.012717323));
	conv2d_11_tf += mul(a2, min16float4x4(0.06736054, -0.0044496846, -0.061849196, -0.04067691, -0.06897966, 0.12449442, -0.2508966, -0.090206414, -0.0938398, 0.013633642, 0.1409954, -0.08719504, -0.06788997, 0.098299906, 0.06095718, -0.071988545));
	conv2d_11_tf += mul(b2, min16float4x4(-0.036788728, 0.0037640312, 0.0037646547, -0.021026969, 0.09899778, -0.054118365, 0.08308994, 0.10520542, -0.2592658, 0.113168575, -0.15985844, -0.15588784, -0.114530176, 0.0118468655, -0.08904175, -0.106764145));
	conv2d_11_tf += mul(c2, min16float4x4(-0.027948795, 0.004584627, -0.03517112, -0.0007581547, -0.025537577, -0.035759352, 0.0973176, 0.03644148, -0.16327894, -0.12705119, -0.028998915, 0.123633325, -0.19453679, 0.113648765, 0.012692621, 0.057508085));
	conv2d_11_tf += mul(d2, min16float4x4(-0.010877041, -0.03980561, 0.013339347, 0.061969575, 0.2810196, 0.058558464, -0.1776418, 0.16630451, 0.05817873, 0.07262613, -0.03700459, -0.04399585, -0.16587572, 0.08260915, -0.009857085, 0.04391152));
	conv2d_11_tf += mul(e2, min16float4x4(0.13224548, -0.074267186, 0.03054752, -0.07024831, 0.074185595, -0.02313642, 0.065747924, -0.004960654, 0.14500527, 0.04731656, -0.117002204, -0.08217113, -0.07336124, -0.084052, 0.12326484, 0.09011222));
	conv2d_11_tf += mul(f2, min16float4x4(-0.16047195, 0.076803066, -0.2786948, 0.1176941, -0.0131406775, 0.009936233, 0.1374073, -0.09565009, 0.10070214, 0.11528786, -0.0730813, -0.13431457, -0.21621323, -0.013119195, -0.18385343, -0.058667593));
	conv2d_11_tf += mul(g2, min16float4x4(0.0040132185, 0.134705, -0.011815555, -0.094924495, -0.05727005, 0.0950522, 0.04084158, 0.016365912, -0.02917897, -0.03862751, 0.012003192, 0.03835569, -0.096041076, 0.004406702, -0.10389978, -0.0009610953));
	conv2d_11_tf += mul(h2, min16float4x4(-0.10157398, -0.10319637, -0.012073916, -0.19039184, -0.02369365, -0.021698838, 0.015538155, -0.051231697, 0.041044284, -0.02691978, -0.1713024, -0.12904704, -0.03471921, 0.037977315, 0.064845525, 0.1264632));
	conv2d_11_tf += mul(i2, min16float4x4(-0.08406344, 0.044064984, 0.056877784, -0.13283873, -0.0058603142, 0.075482026, -0.09246969, -0.065718174, -0.025745329, 0.015633717, -0.06059284, 0.08149079, -0.022848418, 0.061127402, 0.047879003, 0.04544503));
	conv2d_11_tf += mul(a3, min16float4x4(-0.21875143, 0.053516608, 0.04243476, -0.08509983, 0.406294, -0.060116358, -0.13793904, -0.1559247, -0.017128536, 0.021633752, 0.08865264, -0.032922007, 0.08250139, -0.17353764, -0.16137601, 0.12943612));
	conv2d_11_tf += mul(b3, min16float4x4(-0.27127337, -0.057137657, 0.005031509, 0.10027777, 0.20500132, 0.0073007634, -0.09760265, -0.2428409, -0.16160156, 0.32289484, -0.096351616, -0.15562637, -0.24892123, 0.13368145, 0.31498823, 0.09549184));
	conv2d_11_tf += mul(c3, min16float4x4(0.01444343, 0.07115736, -0.17920075, -0.024846312, 0.057884447, 0.14358939, 0.043788955, -0.013016863, 0.087220736, -0.0060180086, 0.19609165, -0.025888423, -0.06294847, 0.03406598, -0.04250465, -0.04808649));
	conv2d_11_tf += mul(d3, min16float4x4(-0.02662509, 0.24295834, -0.07612864, -0.20615683, 0.16377121, -0.05186765, -0.1750536, -0.04726876, 0.29443663, 0.0983683, -0.18610948, -0.1949004, -0.1446201, 0.11045659, 0.013536009, 0.18919495));
	conv2d_11_tf += mul(e3, min16float4x4(-0.080005094, -0.29404542, 0.14548069, 0.013500291, -0.011395713, -0.048017073, -0.053436857, -0.13627477, 0.041908856, -0.30820572, 0.17044339, 0.22999896, -0.32405153, 0.12114645, -0.080108374, -0.06520369));
	conv2d_11_tf += mul(f3, min16float4x4(0.062350888, 0.170049, 0.04211445, 0.12288375, 0.010835714, -0.17722476, -0.18930283, 0.11607083, -0.054421842, -0.004191082, 0.14655825, -0.1229237, -0.058039404, 0.09008831, -0.017603457, 0.027497675));
	conv2d_11_tf += mul(g3, min16float4x4(0.24208143, 0.04073837, -0.014191606, -0.069054805, 0.33024073, 0.25458166, -0.062864356, 0.028975246, 0.17692459, 0.22093695, -0.17666881, -0.03709188, -0.2001521, -0.06491504, 0.199202, 0.08666711));
	conv2d_11_tf += mul(h3, min16float4x4(0.053933676, -0.058177974, 0.006558046, -0.018798346, -0.05610966, 0.21288905, -0.06513558, -0.012686734, 0.11635233, -0.039428618, 0.21562201, -0.07206132, 0.065123, -0.056875434, 0.08877115, -0.10216625));
	conv2d_11_tf += mul(i3, min16float4x4(-0.03294463, 0.011720216, 0.056083966, -0.05530083, -0.16838011, -0.0026962461, -0.17402422, -0.009680605, -0.0064969915, 0.14410603, 0.090527765, 0.048180934, -0.06482277, -0.17573984, 0.36281663, 0.14240478));
	conv2d_11_tf += mul(na1, min16float4x4(0.026252843, 0.01621395, -0.03018171, 0.20843759, -0.05987382, -0.13891932, 0.008612968, -0.03674587, 0.055803657, -0.020272622, -0.12338887, -0.21429133, -0.026188683, -0.08283737, -0.07952566, 0.11333926));
	conv2d_11_tf += mul(nb1, min16float4x4(-0.03251504, -0.04554576, 0.012727539, 0.06115098, -0.23113467, -0.21784578, 0.10390341, -0.028863542, 0.1405748, -0.092941806, 0.04094931, 0.26037696, 0.014778488, -0.0012763811, 0.120576814, 0.017626097));
	conv2d_11_tf += mul(nc1, min16float4x4(-0.18005073, 0.08914073, -0.19792715, 0.07666369, -0.040389247, 0.06043132, -0.068735644, 0.006061951, -0.09742132, -0.015570641, -0.05810036, -0.06305046, 0.06286483, -0.1669205, -0.15426171, 0.046022687));
	conv2d_11_tf += mul(nd1, min16float4x4(-0.045976873, 0.028456753, 0.037186757, 0.05231241, -0.12909305, -0.16277504, -0.0035813665, -0.06294949, -0.04205357, -0.15816367, -0.021810539, -0.108161986, -0.08399507, -0.12965044, -0.00611913, -0.029711436));
	conv2d_11_tf += mul(ne1, min16float4x4(0.2537032, -0.018604688, 0.16584206, -0.20883793, -0.10245589, -0.06570063, -0.16321684, 0.02899805, -0.1427425, 0.20915249, -0.1761724, -0.09594, -0.10995607, -0.11155546, 0.037878104, 0.028106442));
	conv2d_11_tf += mul(nf1, min16float4x4(-0.1628865, -0.17466225, -0.14372015, 0.05667306, 0.10472602, -0.018716356, 0.087850116, -0.056246866, 0.083403885, -0.082255535, -0.10299376, -0.1840543, -0.35220358, -0.059505656, -0.21391232, 0.16591822));
	conv2d_11_tf += mul(ng1, min16float4x4(0.040541083, -0.1146205, -0.021495365, -0.033008795, 0.007970957, 0.007984478, 0.02606323, 0.012668774, 0.12771203, -0.09947922, -0.14149466, -0.1890857, -0.14682727, 0.033072542, -0.11833484, -0.038956877));
	conv2d_11_tf += mul(nh1, min16float4x4(-0.14274059, 0.08827524, 0.011712704, 0.10902492, 0.060481314, 0.003578728, 0.029129535, 0.08889746, -0.09685511, -0.095264345, -0.13920794, -0.11014531, -0.05436568, 0.060371455, 0.07251505, 0.20626338));
	conv2d_11_tf += mul(ni1, min16float4x4(-0.07604635, -0.035359483, 0.010230144, 0.030468917, -0.008423673, 0.0273416, -0.10538517, 0.10806335, 0.03605524, -0.082360476, -0.06390322, -0.19094782, -0.10980772, 0.13070256, -0.009116851, 0.094997086));
	conv2d_11_tf += mul(na2, min16float4x4(0.06696349, 0.02884076, -0.21400648, 0.10645195, -0.15960447, 0.07844191, 0.09057932, -0.022310507, -0.20641366, -0.20897295, 0.05159085, -0.042257026, 0.16398512, -0.22846761, -0.033591952, 0.3359712));
	conv2d_11_tf += mul(nb2, min16float4x4(-0.024236226, -0.13937415, 0.29392216, 0.075087205, 0.07763272, 0.27571923, -0.28625518, -0.37574485, -0.0041614594, 0.051519327, -0.1727601, -0.002199689, -0.32436445, 0.059740037, 0.006543187, 0.11488307));
	conv2d_11_tf += mul(nc2, min16float4x4(-0.025740145, 0.10688955, 0.3432225, 0.04467087, 0.033870216, 0.16714002, 0.20819634, -0.11762629, 0.19059974, 0.0661928, 0.022394795, -0.14459209, -0.16684553, 0.08020461, -0.37147745, 0.04065124));
	conv2d_11_tf += mul(nd2, min16float4x4(-0.006134667, -0.0031798254, -0.101459935, 0.15463492, 0.039860703, 0.077067874, 0.17671694, -0.06597644, -0.12203232, -0.058787927, 0.008942991, 0.0570718, -0.043793175, -0.06388724, 0.0247615, -0.09814649));
	conv2d_11_tf += mul(ne2, min16float4x4(0.009333359, -0.10666345, 0.19417302, -0.08021104, 0.071850464, 0.18651992, 0.1487532, 0.03132098, -0.21202543, 0.02972519, 0.028346745, 0.17178747, -0.24139602, -0.18386513, -0.03009887, -0.17363264));
	conv2d_11_tf += mul(nf2, min16float4x4(0.006349671, -0.0199598, 0.14889078, -0.14921328, -0.08713048, 0.14722322, 0.041971955, -0.019181551, 0.07069949, -0.12362262, 0.08554868, 0.16224997, -0.11218193, 0.3132043, -0.18114331, -0.104602315));
	conv2d_11_tf += mul(ng2, min16float4x4(0.047690846, -0.26872492, 0.2183612, 0.19340567, -0.06084255, 0.04798949, 0.19492827, 0.14699973, -0.07016259, 0.14654481, -0.06714773, 0.07936776, 0.073397264, -0.10646918, -0.13238135, 0.07208961));
	conv2d_11_tf += mul(nh2, min16float4x4(0.07382223, -0.044347115, -0.032497067, -0.02002406, 0.18200569, -0.09839878, -0.0027670355, -0.032592446, -0.05297432, 0.11200702, -0.019955616, 0.112369545, -0.2748285, -0.139697, -0.26332188, -0.303972));
	conv2d_11_tf += mul(ni2, min16float4x4(-0.23713836, -0.003925555, 0.16436225, 0.15221255, 0.1077621, -0.027760457, 0.0059113647, -0.11066059, -0.0980858, 0.011830199, 0.040253483, 0.06447465, -0.0827841, 0.04048125, 0.04551489, -0.12471252));
	conv2d_11_tf += mul(na3, min16float4x4(0.010833946, -0.058524415, -0.19618602, -0.11400699, -0.088038966, -0.08249501, 0.025192872, -0.04508469, -0.017629553, 0.10654934, 0.007814974, 0.041299284, 0.054442752, 0.14059617, 0.09760092, -0.060198124));
	conv2d_11_tf += mul(nb3, min16float4x4(-0.16173755, 0.14454803, -0.036523324, 0.016083395, -0.04597214, 0.019925527, 0.10551423, 0.07915449, -0.09191786, 0.040694106, 0.079085656, 0.04860138, -0.00920608, 0.015785221, 0.08149557, -0.070038155));
	conv2d_11_tf += mul(nc3, min16float4x4(0.09396598, -0.27780503, 0.057351794, 0.17856738, 0.06403465, -0.019479418, 0.13132542, 0.09766009, -0.13038878, 0.106342256, 0.19923963, -0.107940085, -0.11207263, 0.07427199, 0.122141175, -0.17083314));
	conv2d_11_tf += mul(nd3, min16float4x4(-0.0129763335, 0.029884486, -0.1591489, 0.05743726, -0.10154112, -0.05951815, 0.038755298, 0.31987077, 0.041023176, 0.15760195, 0.020455543, 0.117823385, 0.008611401, 0.10392111, -0.029049959, -0.00561999));
	conv2d_11_tf += mul(ne3, min16float4x4(0.11115114, 0.13910228, -0.15370879, 0.14353245, -0.106912665, 0.16457058, -0.0007093892, -0.16065751, 0.12172275, -0.0071658283, -0.13790236, -0.05790294, 0.0258849, 0.047155324, 0.028826248, 0.077854194));
	conv2d_11_tf += mul(nf3, min16float4x4(0.04222945, 0.016645031, -0.22052032, -0.108474314, -0.037527397, 0.1508435, 0.13960642, 0.051745985, 0.17182018, -0.0071819094, 0.13896792, 0.12522686, 0.1307583, 0.09315921, 0.031736225, -0.24318463));
	conv2d_11_tf += mul(ng3, min16float4x4(0.12233872, 0.16193391, -0.045825243, -0.021991767, -0.06857775, 0.019997157, 0.26207915, 0.017674582, 0.14816906, -0.011254348, 0.11932189, -0.06385669, -0.08113471, 0.13287768, -0.008416972, -0.039866585));
	conv2d_11_tf += mul(nh3, min16float4x4(0.15459004, -0.029546147, -0.20761466, -0.12011381, -0.09814943, -0.12983616, 0.0019625768, 0.086729765, 0.22380745, 0.112912305, -0.073421806, -0.061414655, -0.00015528004, -0.10514693, 0.0449276, 0.1197672));
	conv2d_11_tf += mul(ni3, min16float4x4(0.031599533, -0.0699447, 0.10802751, -0.011152619, 0.08078543, 0.10828058, 0.10941837, -0.07911565, 0.16324246, -0.034676578, 0.04017893, 0.01809475, -0.0054880627, 0.027349245, -0.041267768, 0.041391887));
	conv2d_11_tf += min16float4(-0.022754392, 0.009821446, 0.06426939, -0.052443504);
	tex10[gxy] = conv2d_11_tf;
	min16float4 nconv2d_11_tf = max(-conv2d_11_tf, 0);
	conv2d_11_tf = max(conv2d_11_tf, 0);
	
	min16float4 conv2d_10_tf = mul(a1, min16float4x4(0.31697825, -0.38101152, 0.26027805, 0.19195847, -0.15098146, 0.17915927, 0.263392, -0.108211316, 0.004631585, -0.06989657, 0.057514362, 0.013759571, -0.06416892, 0.033370133, -0.04808954, -0.1563251));
	conv2d_10_tf += mul(b1, min16float4x4(0.15827416, -0.17950794, 0.16834997, 0.13073751, 0.030396005, 0.040662624, 0.16062944, 0.041357074, -0.13926722, -0.06929913, 0.10808029, -0.06798461, 0.10745701, -0.102971874, -0.06641405, 0.0885879));
	conv2d_10_tf += mul(c1, min16float4x4(0.017569518, -0.074986644, -0.0381504, -0.108356364, -0.028105393, 0.107422166, 0.010693419, -0.03790183, -0.056355134, -0.17228265, 0.19153535, 0.014339309, -0.072250925, 0.25570604, 0.06766601, 0.10274542));
	conv2d_10_tf += mul(d1, min16float4x4(0.11808023, 0.03209569, -0.047605, 0.10232121, -0.089450955, 0.22296266, -0.031239472, 0.12547736, -0.13355453, 0.09658202, 0.14639929, 0.1722445, -0.16578807, -0.01587181, -0.06775275, 0.106690586));
	conv2d_10_tf += mul(e1, min16float4x4(-0.08015724, -0.09917064, 0.17005561, -0.11093009, -0.033904083, -0.18723048, -0.42410555, -0.34870258, -0.024956835, -0.057636626, -0.17249386, 0.3452565, -0.0781917, 0.048283495, -0.1849922, 0.10712763));
	conv2d_10_tf += mul(f1, min16float4x4(-0.19845031, 0.018594265, 0.11669769, 0.04427017, -0.13347605, 0.14735079, -0.20751207, -0.08490434, -0.077883884, -0.17200643, 0.03127422, 0.11106135, -0.04682848, -0.04392586, 0.11629085, -0.03191463));
	conv2d_10_tf += mul(g1, min16float4x4(-0.035416074, 0.032688126, -0.034218192, -0.35819814, -0.07167647, -0.032766674, -0.09849224, 0.27033108, -0.040135793, 0.11793038, 0.024326177, 0.056732934, 0.0072507905, -0.15076852, -0.007368895, -0.07758195));
	conv2d_10_tf += mul(h1, min16float4x4(0.03677586, 0.088763975, 0.04954433, -0.047844727, -0.07487822, -0.06698103, 0.12568145, -0.22909173, 0.1671084, -0.17893419, 0.09722236, 0.20345661, 0.057767022, 0.044742733, 0.06905004, -0.010992711));
	conv2d_10_tf += mul(i1, min16float4x4(0.0028451576, -0.27325574, 0.14329389, -0.07025869, -0.09781529, 0.0151023185, 0.08696752, -0.056844577, -0.19665222, 0.09358589, 0.16416575, 0.06988374, 0.16515698, 0.09760437, 0.023626767, 0.16473217));
	conv2d_10_tf += mul(a2, min16float4x4(-0.01080354, 0.014449004, 0.11467091, -0.07119837, 0.18900962, -0.06401898, -0.025841001, 0.13663737, -0.04860565, 0.15505394, 0.11083383, -0.06831929, -0.12395706, 0.04564376, -0.132784, 0.095948376));
	conv2d_10_tf += mul(b2, min16float4x4(-0.009644828, 0.05351468, -0.086626254, -0.07883177, 0.12082235, 0.16186416, 0.20026602, -0.12537873, -0.02765183, -0.19664048, -0.14943156, 0.17649364, -0.15099925, -0.16448402, 0.04770359, 0.08525748));
	conv2d_10_tf += mul(c2, min16float4x4(-0.07529481, 0.057762332, 0.02256763, 0.0037007954, 0.052606575, 0.008619477, 0.035252705, -0.060551647, 0.03680644, 0.1457205, 0.0970469, 0.00867666, -0.0931654, -0.046189044, -0.118787736, 0.059376143));
	conv2d_10_tf += mul(d2, min16float4x4(0.024567254, -0.07128407, -0.02618071, -0.16522972, 0.02537496, 0.09393943, -0.018046979, -0.12497053, 0.041589152, 0.028847594, 0.072174646, -0.12484334, -0.096903354, 0.07245438, -0.03219862, 0.037360255));
	conv2d_10_tf += mul(e2, min16float4x4(0.05599119, -0.0027604182, -0.004961665, -0.1297362, 0.10879746, 0.14088875, -0.031004267, -0.016735828, 0.07093551, 0.024946349, 0.16840066, -0.10094298, -0.04150052, 0.09933387, 0.09332617, -0.121228844));
	conv2d_10_tf += mul(f2, min16float4x4(0.099246845, -0.17000747, -0.17089754, 0.0021521626, 0.046584304, -0.037944607, 0.1009471, 0.110904016, 0.17920195, -0.00022254961, 0.07443117, 0.07490046, 0.1700909, -0.18371364, -0.15320961, -0.0344897));
	conv2d_10_tf += mul(g2, min16float4x4(0.10543544, 0.04469465, 0.14627467, -0.07649682, -0.082381524, 0.12919065, 0.090079635, -0.07820535, -0.06769879, -0.12625079, -0.06946243, -0.19333136, 0.02998107, 0.01594043, 0.12332583, 0.015775004));
	conv2d_10_tf += mul(h2, min16float4x4(0.025815854, 0.015107419, -0.045278236, 0.13242702, -0.059958965, 0.031560495, 0.047686167, 0.064922616, 0.09818797, -0.07938157, -0.08586279, 0.079509474, -0.031728156, 0.052335043, 0.046583798, 0.17072229));
	conv2d_10_tf += mul(i2, min16float4x4(-0.07827454, -0.033509843, 0.054832056, -0.011652403, -0.029872715, -0.13623856, 0.013034195, -0.009600983, -0.08374398, 0.0022505643, 0.042340405, 0.050227124, -0.072084844, -0.044353593, 0.06991293, -0.024949703));
	conv2d_10_tf += mul(a3, min16float4x4(0.08938938, -0.092218116, -0.016011834, 0.038319822, 0.12462916, 0.30430344, -0.2225195, 0.23016618, 0.16917962, -0.10025298, 0.03197825, -0.0028935818, -0.20949106, 0.16084236, 0.02389285, -0.07628905));
	conv2d_10_tf += mul(b3, min16float4x4(0.008811933, -0.07407284, 0.06164061, -0.08511243, 0.23705618, -0.04852394, -0.09615244, -0.14999956, 0.14771207, -0.31061637, 0.053693004, 0.12648372, 0.13281338, -0.052495755, -0.10527891, 0.055210527));
	conv2d_10_tf += mul(c3, min16float4x4(-0.002706158, -0.08600029, 0.067195736, 0.11638961, 0.22492133, 0.21856707, -0.07640264, -0.06916772, 0.06080084, 0.11333604, 0.06812178, -0.033994764, 0.18698989, -0.0062931813, -0.07839693, -0.19759217));
	conv2d_10_tf += mul(d3, min16float4x4(0.016470285, -0.08823432, 0.22680223, 0.09997554, 0.23114151, 0.19813643, -0.35361916, 0.2194339, 0.11047473, 0.068083756, 0.067214124, 0.43412095, -0.012517998, 0.15817562, 0.041793827, -0.12873247));
	conv2d_10_tf += mul(e3, min16float4x4(0.072530076, 0.13730067, 0.2244758, -0.07199118, -0.052385315, 0.10464238, 0.26556495, -0.2717685, -0.11540168, -0.018752037, 0.025696546, -0.12900795, -0.010386023, -0.020768933, 0.24903738, -0.14111607));
	conv2d_10_tf += mul(f3, min16float4x4(-0.24632111, -0.015176092, -0.02656606, 0.009465184, -0.0051622107, 0.14365524, 0.110313326, 0.075529456, -0.041912608, -0.012926297, 0.099115536, -0.043660834, 0.14709431, 0.069978856, 0.19860862, 0.30215213));
	conv2d_10_tf += mul(g3, min16float4x4(0.003388868, 0.000683922, 0.025133248, 0.004995937, -0.06642034, 0.028584523, -0.14691937, -0.2014579, 0.15427552, -0.027058927, 0.04456965, 0.084938034, -0.24065961, -0.014348999, -0.093859546, -0.032467082));
	conv2d_10_tf += mul(h3, min16float4x4(-0.067999065, -0.061825316, -0.056987073, 0.0009880592, -0.014163033, -0.30605268, 0.22628185, 0.01192761, -0.08495571, 0.17559315, -0.17546391, -0.0027795131, -0.289151, -0.41655365, 0.11138813, -0.18327911));
	conv2d_10_tf += mul(i3, min16float4x4(-0.032702215, 0.072819114, -0.06573772, -0.023648093, -0.28138083, 0.0492584, 0.17402509, -0.04257587, 0.109756455, 0.086533375, -0.017961387, 0.02175586, -0.12014975, 0.0101643065, 0.34295502, -0.04737776));
	conv2d_10_tf += mul(na1, min16float4x4(-0.043654937, 0.030818325, 0.009349365, 0.0058960635, 0.075968295, 0.10992966, -0.056467474, -0.053309787, -0.020969287, 0.13869311, 0.118167736, 0.20124547, -0.071703844, 0.16065824, 0.0333816, 0.16069882));
	conv2d_10_tf += mul(nb1, min16float4x4(-0.00913058, 0.11581215, -0.08088577, 0.048499383, -0.002100561, 0.14013395, -0.021854091, 0.022357881, -0.007194664, 0.2258521, 0.28041685, 0.035750967, -0.17555529, -0.06302401, 0.006144002, 0.073763065));
	conv2d_10_tf += mul(nc1, min16float4x4(0.13105561, 0.033134516, -0.123544686, 0.036164157, 0.081316054, -0.09048299, -0.034898795, -0.04975392, -0.118228555, 0.0013148085, -0.024866905, -0.07593515, -0.058713235, 0.081549294, 0.09502267, -0.06489622));
	conv2d_10_tf += mul(nd1, min16float4x4(-0.013302538, 0.14520672, -0.041146558, 0.08169293, 0.1506187, 0.062507726, 0.19582897, 0.05240332, 0.015582799, 0.08783006, 0.016972601, -0.23824452, -0.056192238, -0.087197326, 0.0045260703, -0.012997719));
	conv2d_10_tf += mul(ne1, min16float4x4(-0.074937195, -0.018988643, -0.07370074, 0.048774365, 0.07236563, 0.0904083, -0.10467449, 0.10507359, 0.12723474, -0.1263123, -0.17705469, -0.15779553, -0.23850663, -0.119912334, 0.21794695, 0.19370297));
	conv2d_10_tf += mul(nf1, min16float4x4(-0.04097957, -0.0038975494, 0.11273524, -0.049562607, -0.041399803, 0.013795214, -0.07912852, 0.06913985, -0.039762158, 0.031136844, -0.22443683, -0.07978295, 0.15926225, -0.021239735, 0.02987538, 0.0073201153));
	conv2d_10_tf += mul(ng1, min16float4x4(-0.00022499492, 0.07021377, 0.10080298, -0.049646243, 0.08742822, -0.05083212, 0.11067444, 0.0028296155, -0.06948983, -0.032108277, -0.17148562, 0.031176677, 0.028853005, 0.06482861, 0.0068417406, 0.20317557));
	conv2d_10_tf += mul(nh1, min16float4x4(0.11648821, -0.17146581, 0.067954056, 0.08905258, -0.08075704, 0.019719714, -0.11522013, 0.07268729, 0.0639498, 0.19816676, 0.014075983, -0.032495353, -0.017302783, 0.001971279, -0.03852454, 0.13213885));
	conv2d_10_tf += mul(ni1, min16float4x4(-0.043073803, 0.013491542, -0.0071037943, 0.104073495, 0.02311169, 0.058454588, -0.036697295, -0.048574958, -0.02161516, 0.10554709, 0.07252144, 0.013570617, -0.08058747, -0.050845098, 0.11659161, 0.12994757));
	conv2d_10_tf += mul(na2, min16float4x4(-0.065163076, 0.19974495, -0.4120684, 0.07145881, 0.113002166, 0.23591681, 0.09600776, -0.12980238, -0.032298863, -0.09617708, -0.09807077, -0.019956803, -0.0144692, -0.11556348, -0.080140986, -0.088292986));
	conv2d_10_tf += mul(nb2, min16float4x4(-0.012835261, -0.04646276, 0.072318554, -0.08490823, 0.1648558, -0.15578964, 0.07145768, 0.12143512, 0.007787767, 0.07922046, -0.10203864, -0.15637778, 0.17195338, -0.16184372, -0.01940918, -0.0037627215));
	conv2d_10_tf += mul(nc2, min16float4x4(-0.118128635, -0.06761304, 0.20045926, -0.11828058, 0.022446023, -0.09117082, 0.11077834, 0.12605691, -0.094919816, -0.016070768, -0.025274863, 0.13070245, 0.14234897, -0.080053166, -0.14352201, 0.24688406));
	conv2d_10_tf += mul(nd2, min16float4x4(-0.038446598, 0.06076558, 0.011793446, -0.027539631, 0.12532312, 0.12770405, 0.05115926, 0.07202868, 0.00048553053, -0.20094085, 0.14294891, 0.27486032, 0.09690127, -0.19488129, -0.010087613, -0.32277402));
	conv2d_10_tf += mul(ne2, min16float4x4(-0.03640304, -0.03347442, -0.14699876, 0.084367014, -0.0931957, 0.0046109143, -0.10012045, -0.21788213, -0.22289619, -0.15080798, 0.053079627, 0.058909237, 0.0033036254, -0.266638, 0.15794982, 0.15606833));
	conv2d_10_tf += mul(nf2, min16float4x4(-0.16570765, -0.19292961, -0.040884703, 0.0350054, 0.044223823, -0.05094823, -0.10369617, -0.026184212, -0.07026344, 0.08071905, -0.05532503, -0.105882615, 0.11906692, -0.12926123, 0.18500324, 0.09285109));
	conv2d_10_tf += mul(ng2, min16float4x4(-0.30376035, -0.015966324, -0.080935225, -0.054857124, 0.008181847, -0.051866602, 0.086870745, -0.205586, -0.13184556, -0.03217006, 0.029946566, -0.10589564, 0.045322973, -0.1656244, -0.08579307, -0.121582575));
	conv2d_10_tf += mul(nh2, min16float4x4(-0.06772616, -0.14879958, -0.17823575, 0.020676576, -0.04157187, -0.019993478, -0.026832247, -0.22187601, -0.12282354, -0.101527624, 0.10540906, -0.09816911, 0.01171376, -0.35307917, -0.21599512, -0.12673624));
	conv2d_10_tf += mul(ni2, min16float4x4(0.13506149, -0.12476234, -0.23067783, 0.0016245812, 0.27068454, 0.085986294, 0.08674341, 0.07736311, 0.04183122, 0.09630597, 0.005955931, -0.033355173, -0.19212, -0.2707448, -0.18517534, -0.035879433));
	conv2d_10_tf += mul(na3, min16float4x4(-0.0151614295, 0.047397353, 0.0923022, 0.08485078, 0.15618569, -0.11042138, 0.12418296, -0.07967247, 0.053651772, 0.015027734, 0.048835948, 0.07711154, 0.020557769, 0.023958597, 0.04587901, -0.0014006038));
	conv2d_10_tf += mul(nb3, min16float4x4(0.038551513, -0.10045045, 0.06231501, 0.043190606, 0.011727592, 0.10791629, 0.022111481, -0.053163722, 0.11845128, -0.102105886, 0.08789077, -0.0027942352, -0.08893058, 0.008466707, 0.011015023, -0.047280762));
	conv2d_10_tf += mul(nc3, min16float4x4(0.013820725, 0.1256963, 0.041195784, -0.057415746, -0.07633132, -0.025274424, 0.029755162, -0.046797376, -0.037444938, -0.09385259, 0.14993298, 0.040402364, 0.057619866, 0.0044342144, 0.044209216, 0.13005155));
	conv2d_10_tf += mul(nd3, min16float4x4(0.07646884, 0.18639803, -0.021711063, 0.021434348, 0.11517055, 0.010340496, -0.0018932755, -0.3739696, 0.1309672, 0.08240308, 0.08870368, 0.09622062, -0.07567563, -0.08575518, 0.12712875, 0.16571298));
	conv2d_10_tf += mul(ne3, min16float4x4(-0.028878238, -0.06821328, -0.048233025, 0.010556409, 0.08252249, 0.12659778, 0.10306397, 0.041443437, -0.008534995, -0.08196783, -0.13689299, 0.048229158, 0.12889823, 0.12517701, -0.06344265, 0.11288182));
	conv2d_10_tf += mul(nf3, min16float4x4(0.20085302, 0.024324976, 0.012985146, 0.045487225, -0.14292689, 0.091915675, 0.030304266, -0.007919423, -0.09057523, -0.13942213, 0.22375956, -0.15821122, 0.13392857, 0.06950518, -0.009899817, -0.19455001));
	conv2d_10_tf += mul(ng3, min16float4x4(-0.18937646, 0.13056205, -0.09389302, -0.06861626, 0.030355467, -0.07237441, 0.079272114, -0.018099891, -0.057733692, 0.14460595, -0.068894215, 0.073404275, -0.005731954, -0.16851021, 0.029365558, 0.04029561));
	conv2d_10_tf += mul(nh3, min16float4x4(-0.11247864, -0.026352342, -0.26439467, 0.021711655, -0.17112786, 0.09201832, 0.058435153, -0.18282679, -0.058647767, -0.0882594, -0.09513095, 0.046603747, 0.118426494, -0.06860188, 0.14646193, -0.10118678));
	conv2d_10_tf += mul(ni3, min16float4x4(-0.08203177, 0.049650684, 0.11541628, 0.07473622, -0.06572682, -0.018375592, -0.0739239, -0.08190655, -0.012673694, 0.0003337712, 0.041397918, -0.047579113, -0.13510825, 0.025625594, -0.035801806, -0.045355853));
	conv2d_10_tf += min16float4(0.03802586, 0.06033134, 0.0405485, 0.00039835402);
	tex11[gxy] = conv2d_10_tf;
	min16float4 nconv2d_10_tf = max(-conv2d_10_tf, 0);
	conv2d_10_tf = max(conv2d_10_tf, 0);

	min16float4 target = mul(e1, min16float4x4(0.2216899, -0.006199309, -0.14865121, 0.06256912, 0.082141966, 0.069441915, -0.064958416, -0.014999604, -0.017270254, 0.054063573, -0.30066323, 0.09460075, 0.17069338, -0.26000282, 0.026078973, -0.0024098607));
	target += mul(e2, min16float4x4(0.22918217, 0.2753827, -0.2260137, 0.0074888375, 0.007864308, 0.01738929, 0.036404576, 0.15125586, 0.12692557, -0.1064573, -0.105954304, 0.17095445, -0.295937, 0.2284073, -0.28089303, 0.17836742));
	target += mul(e3, min16float4x4(-0.23949356, -0.20830329, 0.043005105, 0.11848222, 0.26292896, 0.13052817, 0.14105777, -0.14028162, 0.033770017, -0.12098709, -0.19063175, -0.020637099, 0.032703582, -0.31454226, 0.07559202, 0.067997165));
	target += mul(ne1, min16float4x4(-0.26934767, 0.25418487, 0.2089665, -0.15689164, 0.068669625, -0.19087234, 0.034052055, -0.038685646, 0.037284948, 0.14673525, -0.001882231, 0.07179596, -0.054052413, 0.2954734, 0.108455196, 0.21742904));
	target += mul(ne2, min16float4x4(0.24180835, 0.012385412, -0.017178789, 0.032714315, -0.26524556, 0.024244266, -0.226589, -0.0358992, -0.2241718, 0.08004254, -0.017615836, -0.2492002, 0.09387765, 0.18154638, -0.034240507, 0.3605678));
	target += mul(ne3, min16float4x4(0.24151021, -0.014141217, -0.1259467, -0.19366209, -0.07166293, 0.08856931, -0.08999051, 0.31848234, -0.07388433, -0.16038652, 0.28902727, 0.2382835, -0.15296587, -0.12924191, 0.16233487, 0.05408346));
	target += mul(conv2d_11_tf, min16float4x4(-0.18532315, 0.116318375, -0.043276392, -0.20643523, -0.1317004, -0.025412546, -0.32449946, 0.08039049, -0.18457016, -0.015615943, -0.01645252, 0.21732457, 0.082662076, 0.1900878, -0.11705433, 0.14767131));
	target += mul(nconv2d_11_tf, min16float4x4(0.052993804, -0.11595191, 0.32436988, -0.003765943, 0.2296748, 0.119828835, -0.019125028, -0.3126433, -0.039699726, -0.24760635, 0.08949547, -0.012501165, 0.33296522, -0.349697, -0.081094205, 0.061596226));
	target += mul(conv2d_1_tf, min16float4x4(-0.033869196, 0.12660468, 0.12152309, -0.18401411, 0.1442463, 0.18430543, 0.22487932, 0.29795903, 0.17951487, -0.24413475, -0.13472381, 0.3147198, -0.22021247, -0.15316834, 0.013162168, -0.20238425));
	target += mul(nconv2d_1_tf, min16float4x4(-0.0015613904, -0.09523476, 0.024224702, -0.17930624, -0.061623972, 0.06495367, 0.3776854, -0.17299566, -0.36212873, 0.13202415, 0.07052771, -0.1219512, 0.29942214, -0.011110212, 0.36104754, 0.0010065075));
	target += mul(conv2d_4_tf, min16float4x4(0.16467105, 0.29388088, 0.13385788, 0.118168965, 0.15695275, -0.2269201, 0.097460486, -0.04286567, 0.020316202, -0.07753041, -0.18018067, -0.111885116, -0.17371373, 0.04722513, 0.2188871, 0.1295067));
	target += mul(nconv2d_4_tf, min16float4x4(0.2567296, 0.0027146419, -0.18108767, -0.10636566, -0.04075492, 0.08977396, 0.27601838, 0.041642547, -0.29131287, -0.0026349663, 0.16847563, 0.29684088, 0.23944439, -0.12667872, -0.31902757, -0.023768846));
	target += mul(conv2d_7_tf, min16float4x4(-0.12111429, 0.046077378, 0.07920395, -0.3619861, 0.0030046673, -0.21324079, -0.14134064, 0.07692796, 0.2308601, 0.050601542, -0.20067136, 0.1312576, 0.078878105, -0.07905382, 0.04887801, 0.11589316));
	target += mul(nconv2d_7_tf, min16float4x4(0.18035689, 0.022012187, -0.05441432, -0.13895841, 0.1792498, 0.06579118, -0.3518265, 0.19284686, -0.36724597, -0.19384578, 0.052024953, 0.069351286, -0.17106277, 0.01428955, -0.022695465, -0.03882866));
	target += mul(conv2d_10_tf, min16float4x4(0.12341931, 0.21374431, 0.14095145, 0.11081035, -0.1377048, 0.2957615, 0.2647214, -0.21324296, 0.18657272, -0.16867872, 0.13558641, -0.14022234, -0.00384067, -0.19601567, -0.20603377, 0.006892211));
	target += mul(nconv2d_10_tf, min16float4x4(0.05891213, 0.17766091, -0.11099863, -0.10597074, 0.4759035, -0.20892517, -0.35479382, -0.057822235, -0.10161365, -0.11828349, -0.021581944, 0.057930104, -0.46801752, -0.25330284, 0.30126703, -0.31744412));
	target += min16float4(0.011156243, 0.004168819, 0.082229175, 0.043994825);
	tex1[gxy] = target;
	
	target = mul(e1, min16float4x4(0.137003, -0.06089221, -0.108805895, 0.27130327, -0.3015222, -0.26373127, 0.019133324, 0.035202216, 0.040255867, 0.09030984, -0.46218738, -0.3097094, -0.057662863, 0.123317555, 0.037645355, 0.010423522));
	target += mul(e2, min16float4x4(0.29102653, -0.17060617, 0.31592718, -0.15487169, -0.09719322, 0.08212171, -0.24112037, -0.5323616, 0.050776903, 0.26745227, -0.0123307025, -0.0076298076, -0.044822518, -0.15961778, 0.26758936, 0.019300641));
	target += mul(e3, min16float4x4(0.19517086, -0.2878986, 0.12765801, -0.12057966, 0.27521843, 0.028182628, 0.32267106, 0.035355434, -0.065272234, -0.015919037, 0.38220987, 0.14314096, 0.052418232, 0.07207548, -0.41493666, -0.03195114));
	target += mul(ne1, min16float4x4(0.18309553, -0.11183888, -0.052814357, -0.08971906, -0.14353213, -0.20144752, -0.20325397, -0.16143575, 0.028960846, -0.16557908, 0.266044, -0.2373641, 0.12750591, -0.11190832, 0.35028338, 0.17638433));
	target += mul(ne2, min16float4x4(0.058721025, 0.21000905, -0.2719825, -0.16923684, 0.2887994, 0.08877727, -0.1274528, 0.12557751, -0.09804875, -0.37839252, -0.1465434, -0.1059692, 0.07212408, -0.101579584, -0.16375211, -0.09519384));
	target += mul(ne3, min16float4x4(-0.145749, -0.15073515, -0.2661711, -0.21265043, -0.3345085, -0.16820145, 0.07732321, 0.13837157, 0.014605319, -0.14113256, -0.3269443, -0.100293055, 0.114504874, -0.4271041, -0.17389913, 0.0033216716));
	target += mul(conv2d_11_tf, min16float4x4(0.022264633, -0.19477129, 0.050657783, -0.08318149, -0.5125155, 0.030831251, 0.110084355, -0.25779435, 0.08368584, 0.48425493, -0.28335044, 0.23433922, 0.31263804, -0.12789254, -0.14072786, 0.10106589));
	target += mul(nconv2d_11_tf, min16float4x4(0.007650675, -0.082783565, -0.1599306, 0.22329025, -0.01190027, 0.09498623, -0.06526687, -0.074669816, 0.13880949, -0.0060707824, -0.044009406, 0.15161307, -0.121638715, 0.012903123, 0.047266923, -0.41495043));
	target += mul(conv2d_1_tf, min16float4x4(0.1315474, 0.2878135, -0.03521026, 0.31479505, 0.4425801, 0.22921802, -0.19864602, -0.0049938424, -0.39346734, 0.09232505, 0.20387846, 0.08173493, -0.2582244, -0.23351125, 0.04481434, -0.105453715));
	target += mul(nconv2d_1_tf, min16float4x4(-0.10668876, -0.026544912, 0.19446668, 0.0045490777, -0.024656052, -0.11874863, 0.21377616, 0.16957945, 0.36561254, -0.19234993, -0.16987774, 0.05442733, -0.13925838, -0.09912278, -0.06849117, 0.2862709));
	target += mul(conv2d_4_tf, min16float4x4(0.33045495, -0.13048914, -0.023560356, -0.21611182, 0.031752963, 0.14722162, -0.18900181, -0.214494, -0.014231522, 0.23605579, 0.04047805, 0.4060913, -0.13969432, -0.20286381, -0.29891747, -0.043839972));
	target += mul(nconv2d_4_tf, min16float4x4(0.12433207, 0.20156589, -0.16986352, 0.07386095, -0.08681933, -0.055620465, -0.043641977, 0.25392216, -0.19010517, -0.018021587, -0.040169913, 0.3845108, -0.18094495, -0.07285529, 0.1848976, -0.24628341));
	target += mul(conv2d_7_tf, min16float4x4(-0.038218584, 0.1562106, -0.14935517, 0.14979756, -0.24085392, -0.32680586, -0.015209841, 0.31288582, 0.15819284, -0.084411524, -0.18117775, 0.16964395, 0.29338664, -0.020204993, 0.011733066, -0.03798886));
	target += mul(nconv2d_7_tf, min16float4x4(-0.020065956, -0.043856975, 0.016091857, 0.19466555, 0.16528654, 0.049655683, -0.3676622, -0.14080617, -0.094320625, 0.27908608, -0.084430434, -0.07656003, 0.19461128, 0.11947404, -0.05046522, -0.12625407));
	target += mul(conv2d_10_tf, min16float4x4(-0.013265381, -0.015804514, -0.12068759, -0.06364535, -0.040848896, -0.07602193, -0.04744431, 0.29088646, 0.1358165, 0.010972456, -0.04270195, -0.091147564, -0.2690454, 0.23030208, -0.39135924, -0.22463588));
	target += mul(nconv2d_10_tf, min16float4x4(0.20590256, 0.098045684, 0.3285928, 0.04094028, 0.12415101, 0.244203, 0.048238404, 0.17298737, 0.22513592, 0.048016686, -0.11171281, 0.12644528, -0.40468216, -0.02186692, -0.09637657, -0.20869099));
	target += min16float4(-0.01212462, -0.018702446, -0.0063916473, -0.015887083);
	tex2[gxy] = target;
	
	target = mul(e1, min16float4x4(0.06816948, 0.34817252, -0.046539452, 0.0051957658, -0.1393289, -0.123660676, -0.28295487, -0.09683893, -0.3166085, 0.112649016, 0.016630042, 0.12213537, 0.048850413, 0.10865108, 0.36645818, -0.1570077));
	target += mul(e2, min16float4x4(0.16992034, 0.15695556, 0.23111318, -0.07952356, 0.008467285, -0.11592582, -0.18852152, 0.11257074, 0.24210866, 0.1062648, -0.101493195, 0.04611632, -0.13289067, -0.07632904, 0.012860103, -0.08678244));
	target += mul(e3, min16float4x4(0.19332299, -0.06392618, -0.18013911, 0.23211008, -0.0025107847, 0.4468814, -0.15807462, -0.27148855, 0.24238719, 0.16024797, -0.22240195, 0.2425211, 0.008685379, -0.43995225, 0.28782377, -0.04508348));
	target += mul(ne1, min16float4x4(-0.038411126, -0.0034189979, -0.10616163, -0.22397435, 0.005768774, 0.13181472, 0.091235116, 0.07068676, 0.08932033, 0.025967117, -0.053367026, -0.22340903, -0.13413511, 0.24192514, -0.011392121, -0.09885669));
	target += mul(ne2, min16float4x4(-0.13691483, 0.058308467, 0.14866434, 0.005773672, -0.16254735, -0.03150588, 0.16304344, 0.31798756, -0.22399272, 0.033883456, -0.09658691, -0.12437203, -0.117079385, 0.21686973, -0.037619635, -0.085622996));
	target += mul(ne3, min16float4x4(-0.24666454, -0.06097481, -0.08042751, -0.09151835, -0.09213628, 0.06706758, -0.12596707, 0.05328458, 0.25016794, -0.21868211, 0.22890028, -0.16557315, 0.036212686, 0.13603954, -0.20226133, -0.22868301));
	target += mul(conv2d_11_tf, min16float4x4(0.022882584, -0.023618432, 0.08065757, 0.33173925, 0.07162631, -0.010860303, 0.15222527, -0.21064946, 0.023574507, 0.06347729, -0.2955436, 0.31633475, -0.3643237, -0.087610714, -0.089636534, 0.13809934));
	target += mul(nconv2d_11_tf, min16float4x4(-0.22458415, -0.01961852, -0.014363966, -0.2820657, -0.20567393, 0.106780864, -0.43547606, 0.3259588, 0.42431846, -0.30789465, -0.053756483, 0.18392731, -0.43784657, 0.23359884, 0.25319567, -0.1464313));
	target += mul(conv2d_1_tf, min16float4x4(0.06667747, 0.011182004, 0.26176485, -0.15575507, -0.017922953, 0.0014675539, -0.13763407, -0.086996995, -0.00082739035, 0.03939667, -0.09286956, 0.29952076, 0.014103506, 0.10058367, 0.16165632, 0.23478027));
	target += mul(nconv2d_1_tf, min16float4x4(-0.1966405, 0.11404606, -0.12005759, -0.22895505, -0.0848272, 0.021871557, 0.044186037, -0.111861885, -0.16986093, -0.24633476, 0.07282808, -0.26975635, 0.34241816, 0.030470898, -0.09903839, -0.22579415));
	target += mul(conv2d_4_tf, min16float4x4(0.10059369, 0.010142443, 0.061046213, 0.6807189, 0.005402132, -0.21700516, 0.16900781, -0.09973772, -0.025505878, 0.14216411, 0.14366129, -0.02743741, 0.09240224, 0.055595424, -0.22342968, 0.32391673));
	target += mul(nconv2d_4_tf, min16float4x4(-0.24940865, -0.042881966, -0.19815244, -0.05011009, 0.32227826, 0.07563262, -0.22649106, 0.10700333, -0.14117172, 0.1359497, -0.14451554, 0.34859756, 0.060239617, 0.09917812, 0.13169186, 0.077682465));
	target += mul(conv2d_7_tf, min16float4x4(-0.0714192, 0.12607583, -0.3341241, 0.18375745, -0.18943295, 0.11634349, 0.06633747, -0.13485552, 0.045528308, 0.2432545, 0.26417813, 0.0074096527, 0.004411052, -0.5647283, 0.021793056, -0.1910634));
	target += mul(nconv2d_7_tf, min16float4x4(0.04678379, 0.15781826, -0.14137928, -0.065010436, 0.1379615, -0.07252597, -0.05457498, 0.049137864, 0.054244712, -0.24069838, -0.11444052, 0.27642834, 0.19889133, 0.31845504, -0.102143094, 0.088378325));
	target += mul(conv2d_10_tf, min16float4x4(-0.1163185, 0.19226453, -0.1896929, -0.30681732, -0.013604632, -0.12468549, 0.018667353, 0.09807849, 0.030277459, 0.18578297, 0.14520812, 0.43598676, 0.24981564, 0.22188906, -0.12707953, 0.35956743));
	target += mul(nconv2d_10_tf, min16float4x4(-0.1817424, 0.27081814, -0.16284765, 0.033412658, -0.29831278, -0.1345311, 0.27491164, 0.14552177, -0.054520354, -0.2996891, -0.1279112, -0.64904505, 0.049450837, -0.021562194, -0.6366078, 0.15545636));
	target += min16float4(0.019361967, -0.009793055, 0.03647491, -0.010136049);
	tex3[gxy] = target;
}

//!PASS 6
//!DESC Conv-4x3x3x24, Conv-4x1x1x72
//!IN tex1, tex2, tex3, tex10, tex4, tex8, tex9, tex11
//!OUT tex5, tex6, tex7
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass6(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	uint2 inputSize = GetInputSize();
	if (gxy.x >= inputSize.x || gxy.y >= inputSize.y) {
		return;
	}

	float2 inputPt = GetInputPt();
	float2 pos = (gxy + 0.5f) * inputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	min16float4 a1 = tex1.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c1 = tex1.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d1 = tex1.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e1 = tex1.SampleLevel(sam, pos, 0);
	min16float4 f1 = tex1.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h1 = tex1.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i1 = tex1.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na1 = max(-a1, 0);
	min16float4 nb1 = max(-b1, 0);
	min16float4 nc1 = max(-c1, 0);
	min16float4 nd1 = max(-d1, 0);
	min16float4 ne1 = max(-e1, 0);
	min16float4 nf1 = max(-f1, 0);
	min16float4 ng1 = max(-g1, 0);
	min16float4 nh1 = max(-h1, 0);
	min16float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);
	
	min16float4 a2 = tex2.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c2 = tex2.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d2 = tex2.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e2 = tex2.SampleLevel(sam, pos, 0);
	min16float4 f2 = tex2.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h2 = tex2.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i2 = tex2.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na2 = max(-a2, 0);
	min16float4 nb2 = max(-b2, 0);
	min16float4 nc2 = max(-c2, 0);
	min16float4 nd2 = max(-d2, 0);
	min16float4 ne2 = max(-e2, 0);
	min16float4 nf2 = max(-f2, 0);
	min16float4 ng2 = max(-g2, 0);
	min16float4 nh2 = max(-h2, 0);
	min16float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);
	
	min16float4 a3 = tex3.SampleLevel(sam, pos - inputPt, 0);
	min16float4 b3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, 0), 0);
	min16float4 c3 = tex3.SampleLevel(sam, pos + float2(-inputPt.x, inputPt.y), 0);
	min16float4 d3 = tex3.SampleLevel(sam, pos + float2(0, -inputPt.y), 0);
	min16float4 e3 = tex3.SampleLevel(sam, pos, 0);
	min16float4 f3 = tex3.SampleLevel(sam, pos + float2(0, inputPt.y), 0);
	min16float4 g3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, -inputPt.y), 0);
	min16float4 h3 = tex3.SampleLevel(sam, pos + float2(inputPt.x, 0), 0);
	min16float4 i3 = tex3.SampleLevel(sam, pos + inputPt, 0);

	min16float4 na3 = max(-a3, 0);
	min16float4 nb3 = max(-b3, 0);
	min16float4 nc3 = max(-c3, 0);
	min16float4 nd3 = max(-d3, 0);
	min16float4 ne3 = max(-e3, 0);
	min16float4 nf3 = max(-f3, 0);
	min16float4 ng3 = max(-g3, 0);
	min16float4 nh3 = max(-h3, 0);
	min16float4 ni3 = max(-i3, 0);

	a3 = max(a3, 0);
	b3 = max(b3, 0);
	c3 = max(c3, 0);
	d3 = max(d3, 0);
	e3 = max(e3, 0);
	f3 = max(f3, 0);
	g3 = max(g3, 0);
	h3 = max(h3, 0);
	i3 = max(i3, 0);
	
	min16float4 conv2d_11_tf = tex10.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_11_tf = max(-conv2d_11_tf, 0);
	conv2d_11_tf = max(conv2d_11_tf, 0);
	
	min16float4 conv2d_1_tf = tex4.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_1_tf = max(-conv2d_1_tf, 0);
	conv2d_1_tf = max(conv2d_1_tf, 0);
	
	min16float4 conv2d_4_tf = tex8.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_4_tf = max(-conv2d_4_tf, 0);
	conv2d_4_tf = max(conv2d_4_tf, 0);
	
	min16float4 conv2d_7_tf = tex9.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_7_tf = max(-conv2d_7_tf, 0);
	conv2d_7_tf = max(conv2d_7_tf, 0);
	
	min16float4 conv2d_10_tf = tex11.SampleLevel(sam, pos, 0);
	min16float4 nconv2d_10_tf = max(-conv2d_10_tf, 0);
	conv2d_10_tf = max(conv2d_10_tf, 0);

	min16float4 conv2d_13_tf = mul(a1, min16float4x4(0.18203236, 0.08024887, -0.036568414, 0.13316368, -0.04578262, -0.06611782, -0.006320991, 0.19218548, 0.21009454, -0.08195536, 0.02459481, 0.037244156, -0.09028578, 0.025431598, 0.118399516, -0.11004066));
	conv2d_13_tf += mul(b1, min16float4x4(-0.057694096, 0.016725041, 0.09517554, -0.063389495, -0.15092854, -0.17499524, -0.023841592, -0.0021040211, -0.15481879, 0.058758404, -0.11097904, -0.026721174, 0.0025346193, 0.05679149, 0.0072498247, -0.13605994));
	conv2d_13_tf += mul(c1, min16float4x4(-0.029078262, 0.009836967, 0.07132015, 0.06620542, -0.21533649, 0.13504961, -0.026253965, 0.15687846, -0.041120164, -0.11824143, -0.03505001, -0.069189556, -0.020444538, -0.040636882, 0.104192354, 0.02525567));
	conv2d_13_tf += mul(d1, min16float4x4(-0.0033649271, 0.11870954, -0.29426005, 0.06678275, -0.21843383, -0.06478074, 0.055388454, 0.03360907, 0.15619075, 0.03552764, -0.004492958, -0.11098848, 0.0945473, -0.12231036, 0.060498584, -0.017200515));
	conv2d_13_tf += mul(e1, min16float4x4(0.11771511, 0.19074214, 0.2556847, -0.00011035888, -0.06266651, -0.18949944, -0.08458407, -0.4617736, 0.049839392, 0.0028800126, 0.112179466, -0.23129073, 0.07304365, 0.08169297, 0.010470617, -0.10990468));
	conv2d_13_tf += mul(f1, min16float4x4(0.2365061, -0.011560716, 0.040669534, 0.05682574, -0.108832434, 0.108204864, -0.016406072, -0.35809964, 0.19385669, 0.011194286, 0.022534747, -0.18770002, 0.040628985, -0.106064685, -0.12965748, -0.11940811));
	conv2d_13_tf += mul(g1, min16float4x4(-0.07578536, 0.055741407, -0.07985701, -0.08520933, -0.119309366, -0.001639899, -0.045735355, 0.060324576, -0.04602573, -0.067629695, -0.024497611, 0.11022731, 0.0866483, 0.023369456, 0.092412636, -0.15647933));
	conv2d_13_tf += mul(h1, min16float4x4(0.1151201, -0.06150153, -0.007215896, -0.027420595, -0.27713504, -0.30414173, -0.25002155, 0.04579516, 0.07746921, -0.039378557, -0.0007037489, 0.05171079, 0.04729991, -0.24362347, -0.03316277, -0.029248973));
	conv2d_13_tf += mul(i1, min16float4x4(0.08882578, -0.089215234, -0.027231896, 0.09565371, -0.040585488, -0.0666667, -0.10971792, -0.18664278, 0.08676577, 0.011609924, -0.11474831, -0.0032087977, -0.14733344, -0.013885521, -0.0600112, -0.028661741));
	conv2d_13_tf += mul(a2, min16float4x4(-0.16421804, 0.13640842, -0.053869005, -0.105430946, 0.33498198, -0.19186987, -0.044760693, 0.12338264, 0.04087762, 0.35624924, 0.16211961, -0.16837841, -0.21358813, 0.07136877, 0.09507147, 0.15890902));
	conv2d_13_tf += mul(b2, min16float4x4(-0.3021354, -0.3319794, 0.070228204, 0.1157857, -0.23864768, -0.124694765, -0.035166927, -0.2196196, 0.11144565, 0.15449396, 0.31777796, 0.23201036, 0.36269313, 0.0791044, -0.14027423, -0.10298774));
	conv2d_13_tf += mul(c2, min16float4x4(0.0045441133, 0.14908041, -0.04037237, 0.012396483, -0.41045487, -0.049013153, -0.25163352, -0.18674599, -0.020136787, -0.04309944, 0.16324212, 0.20724443, 0.0013537789, 0.10984782, -0.050586786, 0.07564281));
	conv2d_13_tf += mul(d2, min16float4x4(-0.35524195, -0.08884062, -0.061092835, 0.0016606712, -0.18841584, -0.28330895, 0.110710636, -0.20210983, 0.01599891, -0.019640112, -0.06881855, -0.2822387, 0.16723692, 0.42387784, 0.17316435, 0.014779502));
	conv2d_13_tf += mul(e2, min16float4x4(-0.14352255, 0.2557878, 0.14022757, -0.09769558, -0.08192019, 0.4160667, 0.20182422, -0.29740554, -0.16924635, 0.044684824, -0.21592674, -0.04393559, -0.44846448, -0.2268265, -0.15033214, -0.1552571));
	conv2d_13_tf += mul(f2, min16float4x4(-0.46804324, -0.05876729, 0.0023225946, -0.1399195, -0.12917824, 0.12800436, 0.5672086, 0.42298177, -0.25502345, -0.15043756, -0.010454711, -0.16799574, 0.1695203, 0.31919575, 0.090758204, -0.03608345));
	conv2d_13_tf += mul(g2, min16float4x4(-0.20859653, -0.025662629, 0.11013811, 0.021071844, -0.21565554, -0.08014497, -0.041803278, 0.15999684, -0.09659372, -0.2930284, 0.22263159, -0.058361106, -0.06474458, -0.18948506, -0.22297342, 0.002085207));
	conv2d_13_tf += mul(h2, min16float4x4(-0.25466987, 0.07562997, -0.046997566, 0.01815494, 0.015587753, 0.19885786, 0.17028151, -0.20973559, -0.13089986, 0.056037027, -0.16056974, -0.09570157, 0.36515233, 0.2177508, -0.19389395, 0.042368103));
	conv2d_13_tf += mul(i2, min16float4x4(-0.09177028, 0.029719152, 0.035980605, -0.111842036, -0.14203559, -0.0016779151, -0.23984708, -0.24259119, -0.32218066, -0.16303101, -0.042665064, 0.018674236, 0.132396, -0.07117317, -0.11266681, -0.25140917));
	conv2d_13_tf += mul(a3, min16float4x4(-0.2254921, -0.089444794, -0.03250626, -0.002422312, -0.07599525, 0.06057337, 0.09297158, -0.13625564, -0.05718329, 0.1393445, -0.14238319, -0.035561938, 0.10357985, 0.14509755, -0.05125032, 0.071264446));
	conv2d_13_tf += mul(b3, min16float4x4(0.14345558, -0.13649228, 0.07141237, -0.31665677, -0.106410414, -0.024022767, 0.022847228, -0.066274576, 0.27855787, 0.27377915, 0.100737795, 0.2585287, 0.065262236, 0.3338305, 0.013705893, 0.15107758));
	conv2d_13_tf += mul(c3, min16float4x4(0.24887003, 0.27924842, 0.011750549, 0.02100809, -0.060978264, 0.09022114, 0.10060977, -0.117189266, -0.064989, 0.050291102, 0.02154075, 0.07428455, 0.0128874695, -0.0824151, -0.0955003, 0.1240542));
	conv2d_13_tf += mul(d3, min16float4x4(0.09787086, -0.038460266, -0.012216873, 0.020269781, -0.14274825, -0.10365878, 0.107120685, 0.005830931, 0.18160833, -0.039512586, 0.054537058, -0.10175313, 0.2583083, 0.12110453, 0.11164319, -0.097267024));
	conv2d_13_tf += mul(e3, min16float4x4(0.04403219, -0.06616097, -0.1881836, -0.17728293, -0.30001318, 0.14179994, 0.077847786, 0.009201645, 0.2055038, 0.10847946, 0.034566265, 0.0823046, 0.016860636, -0.029249087, -0.16692844, 0.15714505));
	conv2d_13_tf += mul(f3, min16float4x4(0.01654197, -0.005030059, 0.15659711, 0.029457249, -0.10084003, -0.17541635, 0.20056525, 0.11890777, 0.041007854, -0.021843065, -0.047474306, 0.02461869, 0.09578964, -0.054728534, -0.022951778, 0.1384323));
	conv2d_13_tf += mul(g3, min16float4x4(-0.17401876, 0.0050307186, -0.14960738, -0.06744025, -0.026341015, -0.015185451, 0.097973764, 0.01230041, 0.043848213, -0.022325305, -0.01173514, -0.12744233, -0.1192904, -0.019170178, 0.16593695, -0.11961721));
	conv2d_13_tf += mul(h3, min16float4x4(0.16905174, -0.05465901, -0.10304148, 0.06422409, -0.06595216, 0.032311443, 0.06506821, 0.06866468, 0.12749052, 0.2812222, 0.10223055, -0.009964554, 0.10145132, 0.05452548, 0.21845295, 0.060436632));
	conv2d_13_tf += mul(i3, min16float4x4(0.009076048, -0.075771615, -0.010236168, -0.049228482, -0.009111011, 0.0032872239, 0.030809326, -0.021389242, -0.13207865, -0.20758687, 0.06795314, 0.16716966, 0.022448925, 0.005127875, 0.14822717, 0.1543517));
	conv2d_13_tf += mul(na1, min16float4x4(0.15177163, -0.059483033, -0.061815593, -0.048359588, 0.08666249, 0.01303385, -0.0797276, 0.00045918894, 0.044986565, -0.16032507, 0.001627205, -0.11240742, 0.36031052, 0.3453977, 0.20082399, -0.0872419));
	conv2d_13_tf += mul(nb1, min16float4x4(0.20732729, 0.29925603, -0.027490204, -0.07813189, -0.16492629, 0.13322815, 0.0031292376, 0.041497275, -0.08320837, 0.10200068, -0.17992872, -0.06903506, 0.12075557, 0.10240156, 0.17685287, -0.07302424));
	conv2d_13_tf += mul(nc1, min16float4x4(0.09832397, 0.072229534, -0.038651302, 0.23663157, -0.049382553, -0.11816951, -0.095177956, -0.0063895187, -0.22133054, 0.027618079, -0.010867105, 0.20221426, -0.055768233, 0.123813964, -0.04770652, 0.031318672));
	conv2d_13_tf += mul(nd1, min16float4x4(0.19019139, -0.0055707553, 0.26110023, 0.053353935, -0.09224678, 0.103274055, 0.054068115, -0.028470352, -0.050636273, 0.044128064, -0.1790452, -0.01937518, -0.22987902, 0.13224003, 0.06837358, -0.21524249));
	conv2d_13_tf += mul(ne1, min16float4x4(-0.06441057, -0.14875272, 0.1966193, -0.19311902, 0.116319604, -0.015221862, 0.22331011, -0.12665007, 0.1492529, -0.060963593, -0.13987945, -0.00267954, 0.17791282, -0.017524656, 0.009128157, -0.19969128));
	conv2d_13_tf += mul(nf1, min16float4x4(-0.15118724, -0.02174076, 0.18955654, 0.004134554, -0.074481554, -0.022116778, -0.23729491, -0.21471047, -0.17819612, 0.13824348, -0.0189012, 0.2410327, 0.122907236, 0.115833536, 0.07078602, 0.1497625));
	conv2d_13_tf += mul(ng1, min16float4x4(-0.0030512493, -0.004724951, 0.13259876, 0.009409425, -0.09696517, -0.12920079, -0.13467522, -0.05229473, -0.03711706, -0.038291495, -0.1493357, 0.09193146, -0.11654958, -0.1384159, -0.0809269, 0.12138653));
	conv2d_13_tf += mul(nh1, min16float4x4(-0.111716144, -0.033208963, 0.19639781, -0.28904846, 0.043729085, 0.016957026, -0.078926295, -0.19079417, 0.06363828, -0.019629745, 0.058766138, -0.120303996, -0.15203112, -0.16788657, -0.15019903, -0.20598294));
	conv2d_13_tf += mul(ni1, min16float4x4(0.09737031, 0.19906493, 0.31577814, 0.09887659, -0.10737645, 0.03927124, 0.008865094, 0.030515334, -0.03767332, 0.19419806, 0.052343797, -0.12595782, 0.018560758, -0.004252203, 0.12685028, -0.19064935));
	conv2d_13_tf += mul(na2, min16float4x4(-0.107926846, 0.05654491, 0.039178263, -0.022938857, -0.055884767, 0.01403891, 0.040060706, -0.0876108, -0.08530536, 0.035486717, -0.1397322, -0.111439094, 0.3098693, 0.031957068, -0.1323169, 0.036736827));
	conv2d_13_tf += mul(nb2, min16float4x4(-0.042637993, -0.13947937, -0.06313642, -0.013281999, -0.07746704, -0.0033614477, 0.062081654, -0.028974544, -0.09252038, 0.23787987, -0.03051402, 0.08857487, -0.10345242, 0.08111023, 0.012858327, 0.025468932));
	conv2d_13_tf += mul(nc2, min16float4x4(-0.057991188, 0.06572571, -0.17195612, -0.18226011, 0.13167764, -0.029910656, 0.07416073, 0.011874738, 0.020921603, 0.1790944, -0.02713754, -0.04678265, 0.0025504003, -0.07831189, 0.0022889362, 0.17452945));
	conv2d_13_tf += mul(nd2, min16float4x4(-0.08273035, -0.06628758, 0.09288723, 0.17525311, -0.015099176, -0.02920585, 0.01664239, 0.16360165, -0.058821842, 0.023668878, 0.13803177, 0.05805197, -0.033553623, -0.020296576, -0.2126249, 0.054712847));
	conv2d_13_tf += mul(ne2, min16float4x4(0.11607657, 0.09721635, 0.076664194, 0.107737765, -0.18090104, -0.09323497, 0.1018825, 0.025112988, -0.037965916, 0.07314205, 0.16523585, -0.16451308, 0.011332593, 0.05381852, 0.053742763, -0.051402804));
	conv2d_13_tf += mul(nf2, min16float4x4(0.08998201, -0.09690652, -0.090980336, 0.21645999, -0.1421605, 0.017344419, -0.080088496, -0.1686495, 0.13406368, 0.004237983, 0.028970357, -0.015848784, -0.07229926, -0.08199748, 0.14972275, 0.11688227));
	conv2d_13_tf += mul(ng2, min16float4x4(-0.10923993, -0.006186229, -0.0059918985, -0.056261536, 0.12305135, 0.07601222, 0.015556293, 0.039497726, 0.004694121, 0.03006972, -0.11686323, -0.1083031, -0.053210545, 0.06765771, 0.1847543, 0.12722884));
	conv2d_13_tf += mul(nh2, min16float4x4(-0.15110816, -0.114151604, 0.06755774, 0.1535812, -0.0055134855, 0.124444366, 0.116650686, 0.015837835, -0.13255565, -0.023659749, 0.012672263, -0.014328633, -0.25721112, 0.03517644, 0.07895924, 0.017762167));
	conv2d_13_tf += mul(ni2, min16float4x4(-0.0048434106, -0.15848884, 0.07007013, -0.0040173456, 0.12461628, -0.006840197, 0.054776177, 0.030113375, 0.011075732, -0.12137928, 0.039907288, 0.041261338, -0.03539033, -0.010571816, 0.17591824, 0.07626049));
	conv2d_13_tf += mul(na3, min16float4x4(-0.09215494, -0.047397707, 0.020372266, -0.03961589, -0.2969749, -0.23441714, 0.041512486, -0.23838238, 0.15105574, 0.030688843, 0.10364508, -0.037372112, 0.24514282, 0.11799978, -0.25672802, -0.05064504));
	conv2d_13_tf += mul(nb3, min16float4x4(-0.22321941, -0.22637981, 0.12784286, -0.15949993, -0.1747607, 0.019964136, -0.101212226, -0.14332725, -0.0040852833, 0.13991846, -0.121760346, -0.074741244, -0.14598946, 0.017030315, -0.21471639, 0.023562988));
	conv2d_13_tf += mul(nc3, min16float4x4(-0.025941253, -0.085331805, 0.006736805, 0.080889955, -0.06974209, -0.20366986, -0.2243817, -0.18153073, -0.0024152526, 0.047323234, 0.03407195, 0.016644841, -0.0060426793, -0.1146607, 0.11816627, -0.09477427));
	conv2d_13_tf += mul(nd3, min16float4x4(-0.11221949, -0.016993113, -0.028873868, 0.30510077, -0.10090775, -0.56358117, -0.2178131, -0.3253011, 0.05903533, 0.23069671, -0.040006876, -0.2242038, -0.10916342, -0.038909998, -0.081489064, 0.06539624));
	conv2d_13_tf += mul(ne3, min16float4x4(-0.059550002, -0.07048971, 0.08075795, 0.07341893, 0.08720143, -0.08745607, -0.28628471, 0.004085622, -0.059510656, -0.07080941, -0.17805275, 0.010445313, 0.08262345, 0.14971328, 0.086313516, 0.4270992));
	conv2d_13_tf += mul(nf3, min16float4x4(-0.25829327, -0.25821465, -0.025910528, -0.1256417, -0.32173184, -0.012251011, -0.31182033, -0.17723739, 0.05439974, -0.0018167618, 0.06974409, -0.024687098, 0.05163715, 0.011181801, 0.060559656, 0.18320788));
	conv2d_13_tf += mul(ng3, min16float4x4(0.048055783, 0.030901788, 0.00014199098, -0.015663194, -0.27395675, -0.1374474, 0.055429243, 0.09942114, -0.037852254, -0.033255827, 0.022523645, 0.04666904, 0.16599222, -0.02004086, 0.21397619, -0.11373404));
	conv2d_13_tf += mul(nh3, min16float4x4(-0.23445702, -0.06371413, -0.08418856, 0.06907252, 0.20780656, -0.13808912, 0.018577656, -0.0046262434, 0.09724245, -0.114031695, 0.022883652, 0.107561804, -0.010228, 0.0033352477, 0.12142382, -0.035946723));
	conv2d_13_tf += mul(ni3, min16float4x4(0.058773417, -0.06617424, -0.13876313, -0.007238876, -0.17449926, 0.14130935, -0.17021981, 0.09241347, 0.018518088, 0.085447155, -0.14430992, 0.035074715, -0.02784563, 0.15934117, -0.00036379634, -0.040411446));
	conv2d_13_tf += min16float4(-0.0258258, -0.014007201, -0.0051976936, 0.023554644);
	min16float4 nconv2d_13_tf = max(-conv2d_13_tf, 0);
	conv2d_13_tf = max(conv2d_13_tf, 0);

	min16float4 target = mul(e1, min16float4x4(0.13381699, 0.17966591, -0.0866034, -0.15282217, -0.2567282, -0.38080183, 0.10091161, 0.32172382, -0.064547606, -0.08161712, -0.033353675, -0.0019234467, 0.027740227, 0.2277078, 0.06759129, -0.22699283));
	target += mul(e2, min16float4x4(-0.122093834, 0.20621717, -0.08142724, 0.16477586, 0.4863212, -0.24032472, 0.00055996195, 0.50562304, 0.028121283, 0.56215876, 0.014577866, 0.06960302, -0.15964645, 0.14526807, -0.026474794, -0.02554081));
	target += mul(e3, min16float4x4(-0.101622745, 0.022395104, -0.14208415, 0.09508211, 0.20496333, 0.11371943, -0.024784304, 0.09519364, 0.09233463, 0.03117482, -0.15262024, -0.16956648, -0.2432608, -0.12877996, -0.13148616, 0.043081667));
	target += mul(ne1, min16float4x4(-0.28086182, -0.15846887, -0.058738094, -0.181707, -0.018847898, 0.05197007, 0.09753647, -0.19714034, -0.062462445, -0.17604835, 0.1268098, 0.15334699, 0.05568127, 0.16867611, -0.1686486, 0.28579247));
	target += mul(ne2, min16float4x4(0.20252296, -0.27393097, 0.06578763, -0.12628423, -0.10547165, 0.030740904, -0.19412865, -0.034658667, -0.09081653, -0.19958268, 0.16915733, 0.056093715, 0.10596871, -0.1742866, 0.004890009, 0.19515324));
	target += mul(ne3, min16float4x4(0.32077652, -0.004434404, -0.12717858, -0.13544025, -0.450333, 0.04072708, 0.04316467, -0.2578049, -0.011932833, 0.18828999, 0.12326536, -0.016795376, -0.0054118615, 0.061453808, 0.28015187, 0.13463841));
	target += mul(conv2d_11_tf, min16float4x4(0.08942177, -0.0021343376, 0.23693596, -0.15413974, -0.32839566, -0.010874302, 0.033822935, 0.038676813, 0.18920816, 0.019961799, -0.055697896, -0.042120066, 0.10387084, 0.047366753, 0.17899887, -0.071130194));
	target += mul(nconv2d_11_tf, min16float4x4(0.0010777018, -0.071475126, -0.16156957, -0.08781234, -0.08701292, 0.29084647, -0.34587428, 0.06969663, 0.036580127, 0.106745, -0.1534462, 0.106189206, -0.22758242, 0.20691736, -0.018554503, -0.056773946));
	target += mul(conv2d_1_tf, min16float4x4(0.14826776, -0.03700497, 0.066144, 0.023859248, -0.16708666, -0.23908418, 0.062023632, -0.16278005, 0.06265635, -0.039846748, -0.13978398, -0.027952245, 0.099891245, 0.18235108, 0.00991435, 0.0423486));
	target += mul(nconv2d_1_tf, min16float4x4(-0.17948383, -0.082759954, 0.10543674, -0.18660031, 0.0664088, -0.06837087, 0.04300318, 0.011699623, -0.017162412, -0.030628186, 0.07547453, 0.20060332, -0.19182351, 0.04914753, 0.040280227, -0.12417484));
	target += mul(conv2d_4_tf, min16float4x4(0.04074336, -0.041421015, -0.0372822, 0.1647266, -0.13993263, 0.0029407872, -0.39398977, -0.1778468, 0.21322449, 0.19134948, -0.02818874, 0.226251, 0.06352273, 0.12620094, 0.24221466, 0.20657893));
	target += mul(nconv2d_4_tf, min16float4x4(-0.094572894, -0.046852108, 0.21210444, -0.14082888, -0.050984625, -0.13443558, 0.24309658, 0.1573335, 0.21941295, 0.11642813, 0.09684106, -0.08597462, 0.15502413, -0.018070435, 0.1292023, -0.1557655));
	target += mul(conv2d_7_tf, min16float4x4(0.025215387, 0.16676718, -0.068287216, 0.017648363, 0.2779579, 0.059142746, -0.096408874, 0.22609432, 0.20962398, 0.24879578, 0.023621194, -0.29692242, 0.02272032, -0.33367038, 0.15799981, -0.1699598));
	target += mul(nconv2d_7_tf, min16float4x4(0.08816878, 0.076234445, -0.06670541, 0.024926793, -0.12045598, 0.07443171, 0.22081238, -0.044906516, -0.02448027, -0.22067828, -0.016471038, 0.21801811, 0.16276583, 0.34590468, -0.18487914, 0.0554853));
	target += mul(conv2d_10_tf, min16float4x4(-0.085593045, -0.002904318, 0.049969394, -0.06931361, -0.10722648, -0.08499641, -0.25997344, 0.22650665, 0.069008924, -0.23179024, 0.20058884, -0.20237185, -0.1606995, 0.0758858, -0.09946377, -0.21032207));
	target += mul(nconv2d_10_tf, min16float4x4(0.11210572, 0.055658836, 0.041539114, 0.078087114, -0.060435783, 0.08331363, 0.07356019, 0.0842336, -0.38098484, 0.020591227, -0.45916042, 0.06386686, -0.19348675, 0.041925576, -0.23489946, -0.06711732));
	target += mul(conv2d_13_tf, min16float4x4(-0.13721304, 0.15404533, 0.102312036, -0.090253755, 0.08690545, 0.034154307, 0.07618604, -0.15844443, -0.10604342, 0.2646684, -0.08719668, 0.19331944, 0.10569642, -0.058054388, -0.0110980645, -0.08710107));
	target += mul(nconv2d_13_tf, min16float4x4(0.15567884, -0.11589786, 0.031855986, 0.005064268, 0.37850487, 0.30044487, -0.2604449, 0.061879188, -0.015081224, -0.30759993, -0.07571204, -0.0077929585, -0.08748009, 0.22546281, -0.06377379, 0.435342));
	target += min16float4(0.0053140894, -0.030208405, 0.04287835, -0.059097543);
	tex5[gxy] = target;
	
	target = mul(e1, min16float4x4(0.0029025443, 0.021165721, 0.0070854356, 0.065646365, 0.024636142, 0.20825955, -0.0917655, -0.1706138, -0.1827491, 0.13347003, 0.12910214, 0.06828513, -0.026193604, -0.11451178, 0.0356333, -0.08071165));
	target += mul(e2, min16float4x4(-0.027241195, 0.032633994, -0.17490302, -0.5352789, -0.15734912, 0.24714436, 0.029301014, 0.212763, -0.051665317, -0.06783505, -0.040298667, 0.041179724, 0.49683514, -0.35600296, -0.2518442, -0.22965558));
	target += mul(e3, min16float4x4(-0.061614696, -0.10463926, 0.1594845, 0.036565617, 0.09095015, -0.15100475, -0.09242749, 0.08335822, -0.027257469, 0.4156707, 0.03322028, 0.19685929, 0.07034635, 0.10204465, 0.03657313, 0.30920812));
	target += mul(ne1, min16float4x4(-0.20980133, -0.054115582, 0.031674277, -0.040077273, -0.21693806, 0.016596884, -0.029177245, -0.16924128, 0.121823296, -0.0004884774, 0.10644538, 0.068388954, 0.16517027, -0.12152921, -0.18299894, -0.17595083));
	target += mul(ne2, min16float4x4(-0.0006413291, -0.09444853, 0.15260176, 0.23014128, 0.09366626, 0.06947763, 0.04956597, -0.07001088, -0.075523324, 0.16111156, -0.11700089, 0.14528704, -0.096407495, 0.027310526, -0.03946532, 0.15302157));
	target += mul(ne3, min16float4x4(0.086061105, -0.0070365844, -0.25230658, 0.18741103, -0.36380208, -0.058444727, 0.25284684, -0.26617825, -0.08817363, -0.12209333, 0.011920746, -0.031505488, -0.21880315, 0.16762236, 0.14518112, 0.13803998));
	target += mul(conv2d_11_tf, min16float4x4(-0.17088315, -0.06812898, -0.085912764, 0.25550255, -0.26439053, 0.23305506, 0.18186118, -0.06186191, 0.0075220955, 0.10316868, 0.04271979, -0.008083033, -0.19474187, -0.06700431, 0.15485007, -0.11886802));
	target += mul(nconv2d_11_tf, min16float4x4(0.06597312, -0.31435877, -0.08179224, -0.2568261, 0.29904976, 0.21664406, -0.15343861, -0.11589945, 0.12654455, -0.042093027, -0.17231914, -0.26832506, -0.12008876, 0.11483079, 0.10222754, 0.12562539));
	target += mul(conv2d_1_tf, min16float4x4(-0.09949413, 0.01479024, -0.16933955, 0.025359191, -0.2210058, -0.19663176, 0.19453603, -0.111461386, -0.12529027, 0.14243664, 0.122677036, -0.101476125, 0.011010597, -0.014422488, -0.048979994, 0.03657997));
	target += mul(nconv2d_1_tf, min16float4x4(-0.06923051, -0.1223873, 0.021781938, 0.1323696, -0.11582021, -0.018292433, 0.07495496, 0.043008957, 0.0070410958, -0.14431225, -0.06380941, -0.17411429, 0.052226365, 0.021460915, 0.097367965, 0.37138346));
	target += mul(conv2d_4_tf, min16float4x4(0.16420697, 0.008790036, 0.17185563, -0.025144322, -0.108827055, -0.13030754, -0.14254087, 0.05208047, 0.03751449, 0.06774824, -0.07746288, 0.2250457, 0.039049506, 0.101244815, -0.18138403, -0.12212992));
	target += mul(nconv2d_4_tf, min16float4x4(-0.05138809, 0.19150224, 0.05698308, 0.015970863, 0.23931703, -0.085039265, -0.18294281, 0.03647365, -0.041568805, -0.2920049, 0.013272974, -0.41181135, -0.08101046, 0.028989056, 0.2952233, 0.16312017));
	target += mul(conv2d_7_tf, min16float4x4(0.093839854, -0.038790308, -0.086285874, -0.17890124, -0.2598202, 0.069419555, -0.0065180454, 0.01453452, -0.090191156, 0.012278203, -0.13148692, -0.025104592, 0.09296121, -0.1833281, 0.074660525, -0.031280298));
	target += mul(nconv2d_7_tf, min16float4x4(-0.05336347, 0.08608969, -0.074649446, 0.014608438, 0.22511393, 0.18610351, -0.0029040743, 0.096127085, -0.20254624, 0.14036441, -0.005226189, 0.055212848, 0.20482111, 0.06645607, -0.12018032, 0.062814355));
	target += mul(conv2d_10_tf, min16float4x4(0.13722958, -0.077169575, 0.07269382, 0.20902501, -0.103985704, -0.21184038, -0.12424109, -0.3059887, -0.185413, -0.1964241, -0.14370187, 0.07646031, -0.057924826, 0.28884047, -0.06701312, -0.14548934));
	target += mul(nconv2d_10_tf, min16float4x4(0.14129579, 0.12990993, -0.08791828, 0.07986884, -0.006362554, 0.005971629, 0.016816271, 0.075642705, -0.060138028, 0.13658188, 0.0020529197, -0.38745758, -0.16191563, 0.20532359, 0.34441018, 0.0071060034));
	target += mul(conv2d_13_tf, min16float4x4(-0.03236983, -0.08242242, 0.065607354, -0.072457135, 0.024461512, 0.15522943, 0.120296456, 0.052112654, 0.21442589, 0.19565494, 0.06760742, 0.37604833, 0.097620994, -0.002347599, 0.09269131, -0.34238556));
	target += mul(nconv2d_13_tf, min16float4x4(0.3276042, -0.17974046, -0.095954694, -0.123248585, 0.08306674, -0.3486506, -0.4620704, -0.40518835, -0.17438394, 0.24350463, 0.05616052, -0.14715664, 0.2078043, -0.007834002, -0.21199054, 0.026597755));
	target += min16float4(-0.015380624, 0.018387195, 0.052286647, 0.055403516);
	tex6[gxy] = target;
	
	target = mul(e1, min16float4x4(0.029018598, -0.09923186, -0.1346201, -0.084818475, 0.013764684, 0.054601744, -0.023713779, -0.16826102, 0.038605224, -0.17664196, -0.16562279, 0.14602208, -0.046339583, 0.08062112, 0.20166601, -0.15399997));
	target += mul(e2, min16float4x4(-0.022488657, 0.28881705, 0.22283012, -0.1935156, 0.22948948, -0.26604095, 0.12130448, 0.35176682, -0.044228308, -0.14734231, 0.07643742, -0.008511517, 0.04313213, -0.03179344, 0.048205808, -0.046295088));
	target += mul(e3, min16float4x4(-0.2531207, 0.10446124, 0.12730333, -0.13316457, 0.2988587, 0.025091104, -0.00482534, 0.037484948, -0.04006528, 0.14588606, -0.2078635, -0.18636562, 0.112230495, 0.15386717, -0.11122423, 0.1115416));
	target += mul(ne1, min16float4x4(0.058421213, 0.086035125, -0.042249937, -0.22377387, -0.055913106, 0.020280339, 0.10572877, 0.124147646, -0.16199678, 0.25662583, 0.051422223, -0.11681551, 0.3789257, -0.21530285, -0.18586366, -0.2222266));
	target += mul(ne2, min16float4x4(-0.11123776, 0.056422785, -0.20566264, -0.07211227, -0.011873865, 0.30742383, 0.1306618, 0.06808572, 0.068643585, -0.045474447, -0.11596973, 0.0069175013, 0.0331586, -0.013221628, -0.089815594, -0.17750767));
	target += mul(ne3, min16float4x4(0.45630908, 0.11607409, -0.05464286, 0.013246808, -0.28643015, 0.025237702, -0.1445959, 0.05237954, -0.07100623, -0.34417382, 0.13903524, 0.21305767, -0.17371523, -0.13203263, -0.09479281, 0.018392125));
	target += mul(conv2d_11_tf, min16float4x4(-0.018931253, -0.14936836, -0.06770882, 0.10720343, -0.10476732, 0.1157603, -0.2245781, 0.23242487, -0.21631289, 0.12723672, 0.4190526, 0.38829032, -0.192142, 0.034754496, -0.1103798, -0.17207326));
	target += mul(nconv2d_11_tf, min16float4x4(0.10311498, 0.08424212, -0.048713315, -0.2784966, 0.034522116, -0.13184515, -0.22852737, 0.003882436, 0.36972147, -0.21263883, -0.3308556, 0.10331102, 0.2462766, -0.12618823, -0.040451203, 0.03362719));
	target += mul(conv2d_1_tf, min16float4x4(-0.0150432745, 0.11757923, 0.23359092, -0.19003578, -0.22206408, 0.15738077, -0.14019541, -0.14201044, 0.19273758, -0.003298494, -0.16530107, 0.17979017, 0.24293105, -0.049160067, -0.14296743, -0.12812854));
	target += mul(nconv2d_1_tf, min16float4x4(-0.0020534277, 0.016410163, -0.012038507, -0.0028629426, 0.016464395, 0.0755886, 0.20384903, -0.029324949, -0.13087441, 0.2138074, 0.03701677, -0.1671415, -0.10499825, -0.042930905, -0.007613907, -0.05984843));
	target += mul(conv2d_4_tf, min16float4x4(-0.07029106, 0.05386552, 0.101365924, -0.008048512, -0.090149835, 0.024272785, -0.16436198, 0.2721913, 0.17460534, 0.0034964401, -0.023265982, -0.0120567605, -0.10151709, 0.059922412, -0.13204409, -0.36116782));
	target += mul(nconv2d_4_tf, min16float4x4(-0.12569033, 0.08523279, -0.047763485, -0.0025170774, -0.108375974, -0.032045245, 0.232404, -0.24801816, -0.09875204, -0.14990453, -0.10958757, -0.23116525, 0.015989894, -0.09210713, 0.19653663, 0.14138049));
	target += mul(conv2d_7_tf, min16float4x4(0.17831743, 0.04722249, 0.22804007, -0.29099363, 0.29851902, 0.2542661, 0.0067702304, 0.17606215, 0.25847578, -0.3118978, 0.122089565, -0.07010249, 0.014281751, 0.16585219, -0.1659864, -0.30643156));
	target += mul(nconv2d_7_tf, min16float4x4(0.19042191, -0.028259574, -0.009187334, 0.21004388, -0.08070036, -0.07838277, -0.023598602, 0.13891627, -0.10481482, 0.05874796, -0.256131, 0.19640857, 0.19515458, -0.07920633, 0.020810237, 0.11040215));
	target += mul(conv2d_10_tf, min16float4x4(-0.093089096, -0.09344762, 0.24232084, 0.21563776, -0.23910145, 0.09092736, 0.12202717, 0.27240792, -0.008079913, 0.07417433, -0.11870247, -0.35385913, 0.107840456, 0.033915944, 0.16016287, 0.023731219));
	target += mul(nconv2d_10_tf, min16float4x4(0.21967673, 0.09896617, 0.04236673, -0.20100762, 0.02077549, -0.075936705, 0.008608214, -0.09693712, 0.44249, -0.31763947, -0.027664369, 0.6166134, -0.43993565, -0.025720617, -0.3275949, 0.041507874));
	target += mul(conv2d_13_tf, min16float4x4(0.20305479, -0.06975863, -0.18130508, -0.11641104, 0.119906515, -0.27588886, -0.15420493, -0.1399163, 0.075970694, -0.16776691, 0.05045285, 0.44775927, -0.036058784, -0.28161573, 0.1877619, 0.10209392));
	target += mul(nconv2d_13_tf, min16float4x4(-0.4250348, -0.007887921, 0.307136, -0.18842702, 0.30411714, 0.05816079, 0.26664746, -0.007951849, -0.18454021, 0.30914694, -0.34967366, -0.18838291, 0.06042888, 0.1902336, -0.062413342, 0.015706044));
	target += min16float4(-0.0011628491, -0.0046341973, 0.0007886035, -0.04435556);
	tex7[gxy] = target;
}

//!PASS 7
//!DESC Conv-4x3x3x24
//!IN tex5, tex6, tex7
//!OUT conv1ups, conv1ups1
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass7(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	
	const uint2 outputSize = GetOutputSize();
	if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) {
		return;
	}

	float2 outputPt = GetOutputPt();
	const float2 pos = (gxy + 0.5f) * outputPt;
	
	outputPt *= 2;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	min16float4 a1 = tex5.SampleLevel(sam1, pos - outputPt, 0);
	min16float4 b1 = tex5.SampleLevel(sam1, pos + float2(-outputPt.x, 0), 0);
	min16float4 c1 = tex5.SampleLevel(sam1, pos + float2(-outputPt.x, outputPt.y), 0);
	min16float4 d1 = tex5.SampleLevel(sam1, pos + float2(0, -outputPt.y), 0);
	min16float4 e1 = tex5.SampleLevel(sam1, pos, 0);
	min16float4 f1 = tex5.SampleLevel(sam1, pos + float2(0, outputPt.y), 0);
	min16float4 g1 = tex5.SampleLevel(sam1, pos + float2(outputPt.x, -outputPt.y), 0);
	min16float4 h1 = tex5.SampleLevel(sam1, pos + float2(outputPt.x, 0), 0);
	min16float4 i1 = tex5.SampleLevel(sam1, pos + outputPt, 0);

	min16float4 na1 = max(-a1, 0);
	min16float4 nb1 = max(-b1, 0);
	min16float4 nc1 = max(-c1, 0);
	min16float4 nd1 = max(-d1, 0);
	min16float4 ne1 = max(-e1, 0);
	min16float4 nf1 = max(-f1, 0);
	min16float4 ng1 = max(-g1, 0);
	min16float4 nh1 = max(-h1, 0);
	min16float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);
	
	min16float4 a2 = tex6.SampleLevel(sam1, pos - outputPt, 0);
	min16float4 b2 = tex6.SampleLevel(sam1, pos + float2(-outputPt.x, 0), 0);
	min16float4 c2 = tex6.SampleLevel(sam1, pos + float2(-outputPt.x, outputPt.y), 0);
	min16float4 d2 = tex6.SampleLevel(sam1, pos + float2(0, -outputPt.y), 0);
	min16float4 e2 = tex6.SampleLevel(sam1, pos, 0);
	min16float4 f2 = tex6.SampleLevel(sam1, pos + float2(0, outputPt.y), 0);
	min16float4 g2 = tex6.SampleLevel(sam1, pos + float2(outputPt.x, -outputPt.y), 0);
	min16float4 h2 = tex6.SampleLevel(sam1, pos + float2(outputPt.x, 0), 0);
	min16float4 i2 = tex6.SampleLevel(sam1, pos + outputPt, 0);

	min16float4 na2 = max(-a2, 0);
	min16float4 nb2 = max(-b2, 0);
	min16float4 nc2 = max(-c2, 0);
	min16float4 nd2 = max(-d2, 0);
	min16float4 ne2 = max(-e2, 0);
	min16float4 nf2 = max(-f2, 0);
	min16float4 ng2 = max(-g2, 0);
	min16float4 nh2 = max(-h2, 0);
	min16float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);
	
	min16float4 a3 = tex7.SampleLevel(sam1, pos - outputPt, 0);
	min16float4 b3 = tex7.SampleLevel(sam1, pos + float2(-outputPt.x, 0), 0);
	min16float4 c3 = tex7.SampleLevel(sam1, pos + float2(-outputPt.x, outputPt.y), 0);
	min16float4 d3 = tex7.SampleLevel(sam1, pos + float2(0, -outputPt.y), 0);
	min16float4 e3 = tex7.SampleLevel(sam1, pos, 0);
	min16float4 f3 = tex7.SampleLevel(sam1, pos + float2(0, outputPt.y), 0);
	min16float4 g3 = tex7.SampleLevel(sam1, pos + float2(outputPt.x, -outputPt.y), 0);
	min16float4 h3 = tex7.SampleLevel(sam1, pos + float2(outputPt.x, 0), 0);
	min16float4 i3 = tex7.SampleLevel(sam1, pos + outputPt, 0);

	min16float4 na3 = max(-a3, 0);
	min16float4 nb3 = max(-b3, 0);
	min16float4 nc3 = max(-c3, 0);
	min16float4 nd3 = max(-d3, 0);
	min16float4 ne3 = max(-e3, 0);
	min16float4 nf3 = max(-f3, 0);
	min16float4 ng3 = max(-g3, 0);
	min16float4 nh3 = max(-h3, 0);
	min16float4 ni3 = max(-i3, 0);

	a3 = max(a3, 0);
	b3 = max(b3, 0);
	c3 = max(c3, 0);
	d3 = max(d3, 0);
	e3 = max(e3, 0);
	f3 = max(f3, 0);
	g3 = max(g3, 0);
	h3 = max(h3, 0);
	i3 = max(i3, 0);

	min16float4 target = mul(a1, min16float4x4(-0.04461327, 0.026094772, -0.04326873, -0.022564206, 0.041758694, -0.13209347, -0.022546854, 0.004888482, 0.041229382, 0.078778535, -0.09928822, 0.045134705, 0.07555903, 0.095968306, 0.017260674, -0.16633268));
	target += mul(b1, min16float4x4(0.074613005, -0.024822153, 0.006285665, 0.064223155, 0.08983999, -0.04401517, 0.0021585347, -0.05762909, -0.04529031, -0.081778474, -0.006732511, -0.11184791, 0.10299652, -0.23328288, 0.15988354, 0.100146465));
	target += mul(c1, min16float4x4(0.035105877, -0.0018613822, -0.10513717, -0.033936206, -0.015839642, 0.036846053, 0.057443213, 0.0151035935, 0.073372714, -0.032272663, -0.10095864, 0.11976275, 0.019719468, -0.03309878, -0.09841568, 0.02204194));
	target += mul(d1, min16float4x4(0.030945469, -0.17030734, -0.012849732, 0.015892556, 0.056250833, 0.24895169, -0.13764419, 0.16325791, -0.01160465, 0.006647464, -0.026491588, -0.17801395, -0.02435574, -0.2039599, -0.02686966, -0.026576484));
	target += mul(e1, min16float4x4(-0.037470777, 0.019415256, 0.09230313, 0.018368619, 0.12947397, 0.055918667, 0.03108532, -0.112716034, -0.18622373, 0.13083778, 0.11290179, 0.02457941, -0.055062827, 0.2621282, -0.47111708, 0.14229195));
	target += mul(f1, min16float4x4(-0.028525796, -0.044668507, 0.0581049, -0.05924212, -0.16126277, 0.02257456, -0.08723546, 0.0291216, 0.08648604, 0.1816661, -0.10166446, 0.054426763, -0.049978323, -0.014283805, 0.08187003, -0.33347195));
	target += mul(g1, min16float4x4(0.0660737, 0.07962152, -0.08272859, 0.06791631, -0.055610694, -0.04899803, 0.001302826, 0.034116816, 0.055754438, -0.090739936, -0.058503445, -0.21402411, 0.08279316, -0.017558504, -0.06069706, -0.009158945));
	target += mul(h1, min16float4x4(0.004801658, 0.19979613, -0.12919085, -0.08680655, -0.07869315, 0.13493058, 0.09466464, -0.06683993, 0.45278597, -0.031217117, -0.36346734, 0.007986247, -0.034918886, -0.06899428, -0.17898467, 0.048572816));
	target += mul(i1, min16float4x4(-0.058319356, -0.11041357, -0.038064227, 0.008961388, 0.059284043, -0.006377162, -0.08503998, 0.08246113, -0.042524133, -0.009021081, -0.06406861, -0.036977306, 0.015088326, 0.007376721, 0.045255665, -0.048585415));
	target += mul(a2, min16float4x4(-0.04103631, -0.041285936, 0.032812588, 0.0030869239, 0.04834749, -0.0023517366, 0.01230978, 0.09776701, 0.08415344, 0.20653047, -0.19338459, -0.04812796, -0.084704414, 0.038988277, 0.075450994, -0.08053876));
	target += mul(b2, min16float4x4(0.13506958, -0.2392332, 0.07425533, -0.05262753, -0.06849319, -0.0686977, 0.09134643, 0.032770213, 0.0725978, -0.12106999, 0.068602145, 0.0030026592, -0.0808173, 0.06421806, -0.08257931, 0.21460927));
	target += mul(c2, min16float4x4(-0.008367152, 0.0035576785, -0.012087096, -0.08389121, -0.01598755, 0.12065467, 0.099018045, -0.14851409, 0.030730573, 0.028257858, -0.08153201, -0.08644078, -0.114632666, -0.03989634, 0.005787138, -0.080551155));
	target += mul(d2, min16float4x4(0.063049294, -0.13418451, -0.020768259, -0.12566003, -0.038050238, 0.024393935, 0.040856704, -0.10639481, -0.0021406382, 0.12272091, 0.039621927, 0.009142157, -0.12273027, 0.06595554, 0.03680899, -0.045653462));
	target += mul(e2, min16float4x4(0.14783141, 0.062921695, -0.2287169, 0.17810576, 0.12781417, -0.23455006, 0.08652726, -0.05671725, -0.0154688135, -0.0757278, 0.028468473, -0.055354204, 0.3387407, 0.06741395, -0.21965146, 0.28021505));
	target += mul(f2, min16float4x4(0.12927511, -0.083112024, -0.026347974, 0.11680802, -0.046030812, 0.04145888, 0.029390097, 0.07615963, 0.21023202, 0.015840504, -0.03812723, -0.03267151, -0.03871269, -0.009839764, 0.09856007, -0.07423972));
	target += mul(g2, min16float4x4(0.017651597, 0.020432748, 0.1884304, -0.004845205, 0.009974344, -0.022273665, 0.03930962, -0.035542846, 0.036834106, 0.14699532, -0.099249355, 0.10607033, -0.027745333, -0.0970868, 0.114169724, -0.023726419));
	target += mul(h2, min16float4x4(-0.028299367, -0.15123722, -0.00423565, 0.06813279, 0.00024022427, -0.025944803, 0.022504266, -0.08420193, -0.20596851, -0.1337249, 0.1062062, -0.01428787, 0.014752737, -0.012875446, 0.030165028, 0.035561644));
	target += mul(i2, min16float4x4(-0.09437882, 0.088986255, -0.019357264, -0.07609514, -0.11045937, -0.09335526, 0.0051609105, 0.046330493, -0.102482855, 0.16320266, -0.07661479, 0.033833966, -0.06805305, 0.051780142, -0.015298791, 0.010972507));
	target += mul(a3, min16float4x4(0.0022961323, 0.10782266, -0.06649802, -0.006361161, -0.13554603, 0.032311134, 0.01145253, -0.018523335, -0.051428523, -0.0073554716, -0.11821805, -0.0227195, -0.06375, 0.029970335, -0.038386237, -0.046592798));
	target += mul(b3, min16float4x4(-0.0839258, -0.0200528, 0.004925492, -0.035113, 0.08860089, 0.052822098, -0.16518101, -0.052028593, 0.042811155, 0.13656183, 0.06579406, -0.26585788, -0.00531827, -0.12001242, -0.07681884, -0.021055153));
	target += mul(c3, min16float4x4(0.0678669, 0.038901877, -0.096601896, -0.081621505, 0.0028282998, -0.04645044, 0.04284913, 0.015117329, 0.104568556, 0.006391826, -0.021010842, -0.036205173, 0.06698969, 0.08495347, 0.065073915, 0.07002784));
	target += mul(d3, min16float4x4(-0.041274223, -0.065267585, 0.0070607257, -0.067357324, 0.056948107, 0.04808867, 0.07966329, -0.017361488, 0.030913807, -0.119355716, -0.004582609, 0.050158955, 0.03867934, -0.13543603, -0.0011923639, -0.06866172));
	target += mul(e3, min16float4x4(0.11586327, -0.047302328, 0.062475067, 0.018575871, 0.12420718, -0.03602303, 0.021922488, 0.16011192, -0.16549775, 0.123044305, 0.065160766, -0.30708137, 0.07341779, -0.12929793, 0.08692529, 0.0007729847));
	target += mul(f3, min16float4x4(-0.013340411, 0.058056828, -0.028747091, -0.0020311237, -0.1070798, 0.13726988, 0.017587787, -0.06898856, 0.03802266, 0.13165978, -0.035371024, 0.098588474, -0.036178526, -0.1068027, -0.03172579, 0.0816444));
	target += mul(g3, min16float4x4(0.025470722, -0.010980958, -0.08286821, -0.031260632, -0.0134636145, 0.041295316, -0.09980376, 0.07899825, 0.046056226, 0.17291167, -0.066611394, 0.03685817, -0.020917175, 0.11551815, -0.016370535, -0.003991822));
	target += mul(h3, min16float4x4(-0.039056864, 0.011015572, 0.014014594, -0.08614736, -0.08130745, 0.045282196, -0.04879853, -0.07139807, 0.09670427, -0.07834781, -0.022022815, 0.053423326, -0.055300128, 0.23542596, -0.11442394, -0.05190056));
	target += mul(i3, min16float4x4(0.12978806, -0.020104066, -0.032463916, -0.04754379, 0.05811374, 0.029061198, -0.013163837, 0.051058855, 0.04294865, -0.12551701, 0.17822845, -0.16549106, 0.12024249, -0.0790749, 0.035424378, 0.0062358896));
	target += mul(na1, min16float4x4(0.030824278, 0.06636776, -0.047206167, 0.02480193, 0.071935624, -0.18845995, -0.028480597, -0.10213147, -0.03973547, 0.025171004, 0.016600806, -0.10615915, -0.07395773, -0.050147526, -0.011541545, -0.027081985));
	target += mul(nb1, min16float4x4(-0.035749037, -0.052818663, -0.020621216, -0.023525307, -0.02461827, 0.3019646, -0.024478583, -0.1398278, 0.17499511, 0.22476715, -0.13090259, -0.05484457, -0.023759075, 0.002843161, 0.014099166, -0.011660793));
	target += mul(nc1, min16float4x4(-0.008461302, 0.14787683, 0.07476249, -0.035538696, 0.007945418, 0.04992842, -0.2388183, 0.0061813896, 0.016805701, 0.019992555, 0.034271393, -0.040170603, -0.039961495, 0.009210595, 0.07606321, 0.05323195));
	target += mul(nd1, min16float4x4(-0.017007355, -0.01304119, -0.011782462, 0.043480955, 0.041575707, 0.20513225, -0.16858323, 0.019438695, -0.02795952, -0.032667078, 0.08400571, 0.012488913, -0.025382128, 0.06756553, 0.14349163, -0.012960532));
	target += mul(ne1, min16float4x4(-0.015847925, 0.035881996, 0.09946923, -0.2583748, -0.11036338, 0.02174868, 0.023047017, -0.023119839, 0.0014623358, -0.05400468, 0.1088209, 0.056070726, 0.09849772, 0.106276534, -0.2869582, 0.122843154));
	target += mul(nf1, min16float4x4(0.120457835, 0.0030220735, 0.011593652, 0.04870485, 0.051817082, -0.12444271, -0.0030080245, 0.03186695, -0.119991936, -0.03661239, 0.0462927, 0.047734156, 0.035473768, -0.050326344, 0.048162602, 0.0044394233));
	target += mul(ng1, min16float4x4(0.004526382, -0.040592365, 0.038592715, 0.06312635, -0.012543924, -0.03860053, 0.013131243, -0.11894808, -0.05983815, -0.09653036, 0.14409515, -0.022803063, 0.02864931, 0.014170389, 0.091406494, 0.08613508));
	target += mul(nh1, min16float4x4(0.12344745, -0.034350697, 0.10549495, -0.11843059, -0.041916244, -0.035728436, -0.052881684, -0.07620879, 0.06760638, -0.039527662, -0.006650022, -0.05049626, 0.12109734, -0.005554175, 0.17754045, -0.098896034));
	target += mul(ni1, min16float4x4(0.017840233, -0.0118570635, -0.080244206, -0.14309776, -0.03778345, 0.12812364, -0.011180574, -0.03749929, -0.013458457, 0.028993722, 0.03479446, -0.11635739, -0.01636896, -0.010422004, -0.022923285, 0.013722603));
	target += mul(na2, min16float4x4(0.0022784397, -0.026745517, 0.07457438, -0.023941608, -0.056146793, -0.012885049, 0.010106243, -0.13570426, -0.055139925, -0.0553148, 0.037558038, -0.015558114, 0.055840485, -0.08124391, -0.013017814, 0.18931141));
	target += mul(nb2, min16float4x4(0.10672792, 0.129464, 0.1233261, -0.062469885, -0.08835128, 0.17588028, -0.02560139, -0.07349341, -0.08052734, 0.03086464, 0.12930822, 0.107045054, 0.03136081, -0.11335949, 0.09541032, -0.015009924));
	target += mul(nc2, min16float4x4(0.023294786, -0.17904189, -0.036457974, -0.060965557, 0.088545635, 0.001061151, -0.016771115, 0.082081355, -0.0030623788, -0.05096391, 0.022067994, -0.078540295, -0.12912196, -0.045786213, 0.05568379, -0.16344398));
	target += mul(nd2, min16float4x4(0.043200932, 0.006267473, -0.081682056, 0.044593308, 0.03179784, 0.20806344, -0.038468197, 0.06644582, 0.01704569, -0.029287282, -0.0036700617, 0.018897371, -0.075105995, 0.09612947, -0.06442493, 0.012179776));
	target += mul(ne2, min16float4x4(-0.21926114, 0.18097721, -0.037700515, 0.016763914, -0.057943042, -0.06129067, 0.04456528, -0.2304425, 0.013301696, 0.11028081, -0.18095498, 0.14712757, 0.2271199, -0.3185643, -0.19932592, -0.08554962));
	target += mul(nf2, min16float4x4(0.0117652705, -0.041661818, 0.029219367, -0.046232816, 0.047820047, 0.068789035, -0.113418594, 0.1141295, -0.027060978, 0.07267708, 0.093252845, -0.049717877, -0.087836266, 0.14460698, 0.10277318, -0.04977497));
	target += mul(ng2, min16float4x4(0.022564596, -0.037228584, -0.065915406, -0.011077084, 0.030235467, -0.04677627, -0.06419004, -0.018991074, 0.034164365, -0.019168181, 0.022525655, -0.029373096, -0.079060145, 0.13279332, -0.08545939, -0.045388315));
	target += mul(nh2, min16float4x4(-0.14000517, -0.08309406, 0.13520917, -0.10369978, -0.016325317, 0.00970006, -0.048059512, 0.1412818, 0.040955327, 0.030759163, -0.108052924, 0.005294165, -0.10046129, 0.16592641, -0.035368618, -0.29051507));
	target += mul(ni2, min16float4x4(-0.09455044, 0.0005962807, 0.0006215668, -0.038142636, -0.03929331, -0.01591621, 0.0056410446, -0.036902174, -0.056509133, -0.10841171, 0.07702632, -0.08160013, 0.040747657, -0.08348532, 0.019081287, 0.020851197));
	target += mul(na3, min16float4x4(-0.03399592, 0.10141488, -0.0077629937, -0.17129703, -0.025233645, 0.052428465, -0.019579021, -0.072962284, 0.022322712, -0.18443614, -0.00848578, 0.0376278, 0.055581484, 0.06439001, -0.026564457, 0.015072123));
	target += mul(nb3, min16float4x4(0.11295866, -0.1541795, 0.11074539, -0.12757398, -0.11353885, 0.12023232, -0.07913168, 0.25957996, -0.0064171744, 0.08077023, 0.09673833, 0.008732368, 0.03630595, 0.059769, 0.028521406, 0.029331883));
	target += mul(nc3, min16float4x4(-0.081345834, -0.06722959, -0.13713932, 0.03613845, -0.084334835, 0.046838246, -0.004890033, -0.08524675, 0.15460378, -0.09410546, -0.058240023, 0.11844812, 0.00092362246, 0.028734036, 0.0028451593, 0.03558664));
	target += mul(nd3, min16float4x4(0.067000724, 0.08689177, 0.003695697, 0.08341895, -0.08124141, -0.20499983, 0.09505712, -0.07436812, -0.028131844, 0.050506454, -0.107579716, 0.058785282, 0.031196257, 0.021408495, -0.100359544, 0.07999305));
	target += mul(ne3, min16float4x4(-0.16514844, 0.117525734, 0.24123909, 0.09518423, 0.17757961, -0.28094006, 0.081966326, 0.0802129, 0.0011662474, 0.06366135, 0.07578068, -0.08616794, 0.19857462, -0.10196374, -0.13831666, -0.18653043));
	target += mul(nf3, min16float4x4(-0.06649859, 0.0935902, -0.19097336, 0.16118656, 0.2938468, -0.10315292, 0.08256489, -0.06169784, -0.05889727, -0.018046174, -0.17596339, 0.20343648, -0.08962845, -0.027532624, 0.059598826, -0.14278376));
	target += mul(ng3, min16float4x4(-0.0070921015, -0.07634683, -0.066166356, -0.06432544, 0.050059035, 0.20213397, -0.071587585, 0.031234715, 0.10629024, 0.044645656, -0.023101477, -0.022136679, 0.009119783, -0.10172394, 0.024746796, -0.1161207));
	target += mul(nh3, min16float4x4(-0.046572298, -0.06981039, 0.08314394, 0.043344617, 0.1914716, 0.0046652057, -0.0683364, 0.086023554, 0.06213587, -0.0077511827, -0.03336288, 0.1474879, -0.032717533, 0.078666836, -0.001740435, 0.048321523));
	target += mul(ni3, min16float4x4(0.18346673, -0.20763724, 0.05431475, -0.08291483, -0.0073792376, -0.053458065, 0.08561732, -0.103502, -0.06856406, 0.05193988, -0.009717332, 0.06446446, 0.050632656, 0.013681985, -0.02556495, 0.05056843));
	target += min16float4(-0.01824226, 0.05140684, 0.010533643, 0.017739987);
	conv1ups[gxy] = target;

	target = mul(a1, min16float4x4(0.070670135, -0.026429666, 0.09446684, -0.04920855, -0.08720965, -0.022478819, -0.15962029, 0.29240617, -0.10499224, 0.10415364, 0.11922523, -0.08293139, 0.07846739, -0.15612845, -0.19753109, -0.033664245));
	target += mul(b1, min16float4x4(-0.10003188, 0.088794544, -0.028137686, -0.1375475, 0.079632774, -0.012540568, -0.092962824, 0.10438857, -0.12865996, -0.040098958, -0.030862473, 0.009116932, -0.14513193, 0.13843827, -0.14862274, 0.27156416));
	target += mul(c1, min16float4x4(0.03148634, -0.13966283, -0.022684515, 0.080294125, -0.013548243, -0.01112399, 0.021930493, -0.24562296, -0.029252343, -0.0053704586, 0.088651545, -0.10468119, 0.0077052945, 0.027455118, -0.008439029, -0.08633876));
	target += mul(d1, min16float4x4(-0.073491044, 0.11097277, -0.02937573, 0.045977436, -0.015563786, 0.04763272, -0.17349051, 0.02479734, 0.12201058, -0.09606755, -0.064500526, -0.068423286, -0.10828311, 0.0025430934, 0.060595006, 0.10702606));
	target += mul(e1, min16float4x4(-0.10012673, -0.026187293, 0.039673958, 0.25377232, 0.16539277, 0.015475691, -0.017826023, -0.037547242, 0.27426562, 0.039105, -0.29495236, -0.20741108, 0.3893781, -0.00018520994, 0.18736628, 0.016120607));
	target += mul(f1, min16float4x4(-0.0902328, -0.035078812, 0.0423949, 0.10428684, -0.012309703, -0.0022217801, 0.12843162, 0.008824024, 0.10457806, -0.13958204, 0.042961385, -0.17798209, 0.13051195, -0.2078117, 0.014258071, 0.27743495));
	target += mul(g1, min16float4x4(0.0037268966, -0.002057136, -0.086700045, -0.04034686, -0.039582066, -0.05536445, -0.013854305, 0.13898304, 0.08383669, -0.1389377, 0.09724791, 0.27256468, 0.0012985421, 0.026786802, -0.09553305, -0.08505046));
	target += mul(h1, min16float4x4(0.047094945, -0.15165734, -0.16622189, 0.27696493, 0.04804586, 0.017589863, -0.048407666, -0.1423487, -0.18051605, -0.037678123, -0.083375834, 0.21356659, 0.056051373, 0.058305956, 0.020808164, 0.20114677));
	target += mul(i1, min16float4x4(-0.06873173, 0.056631878, -0.09389161, -0.026553899, -0.005246827, 0.011163956, 0.0807366, 0.018891184, 0.037806395, -0.08414753, -0.29572666, 0.12225136, 0.028108165, -0.12746434, -0.1242189, 0.06427617));
	target += mul(a2, min16float4x4(-0.054436807, 0.0463667, -0.3160585, -0.26496625, -0.0016307884, 0.0027304688, 0.13524249, 0.14023106, 0.15203272, -0.0055950717, -0.047067486, -0.1299749, -0.023347244, -0.011924935, 0.04708069, 0.14064));
	target += mul(b2, min16float4x4(-0.15567084, -0.03462954, 0.014766895, 0.28104082, -0.015955932, 0.048590813, 0.14149605, 0.016979203, 0.15654798, -0.124170296, -0.000571697, 0.18732761, -0.15969957, 0.036891263, -0.08222836, 0.007162299));
	target += mul(c2, min16float4x4(-0.027358167, -0.05515796, -0.21783291, -0.061588667, 0.14288566, 0.034540724, -0.0779948, -0.004935965, 0.087642424, -0.03457867, 0.26657468, -0.08798545, 0.06278833, 0.01650169, -0.15035287, 0.043133624));
	target += mul(d2, min16float4x4(0.05577383, 0.058146708, 0.0057744626, -0.043521628, 0.14279243, -0.22507532, 0.0896487, -0.03373711, -0.29882178, 0.12674153, 0.21856095, -0.03654502, 0.09770278, 0.011492664, 0.01397184, 0.11037485));
	target += mul(e2, min16float4x4(0.14057921, -0.18916433, -0.10062621, -0.19464967, -0.19286343, -0.08279728, 0.0062218676, -0.15246014, 0.0960211, -0.3964747, -0.016336296, 0.028859172, -0.047788087, 0.032031618, 0.054299697, -0.11431765));
	target += mul(f2, min16float4x4(-0.15350376, 0.1362609, -0.011803502, 0.2660655, -0.037387744, 0.18536955, -0.0015025261, -0.011900626, -0.023042146, -0.15995252, 0.060023192, 0.08954088, 0.07074839, 0.059100557, -0.08593189, -0.045180846));
	target += mul(g2, min16float4x4(-0.031948235, 0.07176401, -0.007034352, -0.12552954, 0.049458012, -0.07971771, 0.0093457, -0.10731874, 0.07024961, 0.27386668, 0.07679444, -0.28798524, -0.06428793, -0.0057761013, 0.014161652, -0.0065095956));
	target += mul(h2, min16float4x4(-0.1427731, 0.0833077, 0.13927783, 0.016691789, -0.16832228, 0.10298729, 0.1446675, -0.2656778, 0.0788247, 0.13420862, 0.050337754, -0.08008961, 0.07605825, 0.04659439, -0.054331373, 0.074493684));
	target += mul(i2, min16float4x4(0.07614274, -0.050090652, -0.066727035, 0.055715825, -0.07636078, 0.08155946, -0.061731443, -0.022193443, 0.057011697, -0.009381379, 0.176684, -0.05981099, -0.04690691, 0.051825907, -0.019666756, 0.0017494732));
	target += mul(a3, min16float4x4(0.12878093, -0.091072194, 0.03426444, -0.0014821129, 0.04648442, -0.056241687, 0.12965083, -0.2177644, 0.03271057, 0.013664906, -0.27382636, 0.009116637, -0.020398485, 0.026515692, 0.0059792865, -0.10869647));
	target += mul(b3, min16float4x4(0.017064014, 0.012380988, 0.015886486, 0.041969348, -0.056818817, 0.057386417, -0.19103225, 0.02042478, 0.022307403, -0.16955635, -0.25923833, -0.19144051, 0.044084065, 0.09931404, 0.08665806, -0.17140177));
	target += mul(c3, min16float4x4(-0.034919903, -0.00735085, -0.0040107057, 0.013110185, 0.008756165, -0.11104751, -0.03863784, 0.20081028, 0.008359515, 0.056265604, 0.0035791632, 0.14127707, 0.008306366, -0.061028276, -0.01180833, 0.11239347));
	target += mul(d3, min16float4x4(-0.055210557, -0.0047766017, -0.040911432, 0.04214669, 0.015301695, 0.035733294, -0.09534393, 0.3189227, -0.043539703, 0.10847848, 0.052175194, 0.25319937, -0.075755194, 0.07450996, -0.2392008, 0.17029741));
	target += mul(e3, min16float4x4(0.008697264, -0.062783785, 0.23503996, 0.06680282, -0.10700762, -0.05921618, 0.12575574, 0.12539467, 0.21779932, -0.27365687, -0.08419621, -0.23255387, -0.097952545, -0.33015022, -0.27839977, 0.54275817));
	target += mul(f3, min16float4x4(0.043178167, -0.07644931, -0.002126049, -0.0041748723, 0.12747553, 0.05624526, 0.08894693, 0.1273868, 0.13564228, -0.029284991, -0.1010155, 0.0144336475, -0.067769796, 0.12993337, 0.23458317, -0.1404509));
	target += mul(g3, min16float4x4(0.037086505, 0.04712714, 0.00080463936, 0.026554452, -0.032055024, -0.0346718, 0.14792679, 0.025423491, 0.045839246, 0.040022433, -0.010968567, -0.03638554, 0.03469138, -0.048995998, -0.080627054, -0.15703341));
	target += mul(h3, min16float4x4(0.0022719046, -0.11156194, -0.1660571, 0.07095863, 0.06325309, 0.03638195, 0.011129683, -0.16795434, 0.05859281, -0.050576515, 0.025492875, 0.14741158, 0.16042823, -0.021238782, -0.10693587, 0.062508605));
	target += mul(i3, min16float4x4(0.04699144, -0.06268154, -0.032550193, 0.1368816, -0.046266492, -0.09626834, 0.035877157, -0.017621659, -0.025884021, 0.016501589, -0.033517126, -0.16266182, 0.0063534426, -0.034565207, 0.107733876, -0.19080792));
	target += mul(na1, min16float4x4(-0.01089889, -0.046437796, -0.2864276, -0.059123863, 0.010273228, 0.035363402, -0.18365921, 0.002496715, 0.010531512, -0.044639286, -0.14159343, -0.04712995, 0.031355694, 0.041651487, 0.04172989, -0.072659165));
	target += mul(nb1, min16float4x4(-0.29903612, 0.016968794, 0.2026591, 0.14354537, 0.210121, -0.1271222, 0.11928214, 0.075612746, 0.07222206, -0.113600664, -0.031380497, -0.04970697, -0.040690526, -0.024844045, -0.14514743, 0.10170265));
	target += mul(nc1, min16float4x4(0.00901007, -0.0077540767, -0.16780637, -0.0772044, -0.08349278, 0.035623573, -0.0036132522, -0.1559422, 0.079474956, -0.024358552, 0.05147624, -0.095216155, -0.001963766, 0.026185913, 0.041633602, -0.068779185));
	target += mul(nd1, min16float4x4(0.11536367, 0.06698426, -0.019352471, -0.027348887, 0.12543406, -0.017715944, -0.22333942, -0.07524913, -0.023550004, 0.09020137, 0.15082505, -0.019156344, 0.014714152, -0.100751296, -0.10988814, 0.013269792));
	target += mul(ne1, min16float4x4(0.23938964, -0.015321653, -0.085038215, -0.21858668, -0.15793826, -0.1725926, 0.16878416, -0.15579711, -0.21086636, -0.023652412, -0.10312092, 0.047774162, 0.11063097, 0.02804365, -0.049057744, -0.20330532));
	target += mul(nf1, min16float4x4(0.058630574, 0.10365072, -0.112122595, -0.10462442, -0.04204145, 0.0060419035, -0.038622607, -0.22971797, -0.081746876, 0.110261, -0.03279762, 0.10083948, -0.07525642, 0.096350044, -0.15403591, 0.01831559));
	target += mul(ng1, min16float4x4(-0.013126955, 0.11560779, 0.06401061, -0.014257845, -0.078378044, 0.07452937, 0.030035159, 0.07133207, -0.072352365, -0.049404953, -0.2006817, -0.04745451, -0.0645119, 0.0849615, 0.053003483, 0.07766129));
	target += mul(nh1, min16float4x4(0.07683494, -0.47826648, 0.05708172, 0.12041683, 0.18084203, -0.08476069, 0.093064874, 0.016264802, 0.06801874, -0.01283242, -0.13347803, -0.035351828, -0.0011718989, -0.12699558, -0.0240836, -0.08060763));
	target += mul(ni1, min16float4x4(0.0521042, -0.062541164, 0.05483789, 0.14211908, 0.08606814, 0.06433033, -0.23270494, 0.05307593, 0.09299324, 0.04586578, -0.1193637, 0.12056507, -0.06442679, 0.06762315, -0.010547303, 0.031680685));
	target += mul(na2, min16float4x4(-0.09215318, -0.115724616, -0.061507307, 0.08273653, 0.0265886, -0.092683844, -0.22037667, -0.023114366, 0.028223295, -0.029118685, -0.088996224, 0.1023557, -0.089898214, 0.15436162, 0.16985597, 0.1431367));
	target += mul(nb2, min16float4x4(0.10560199, 0.13460231, 0.024534458, 0.1370791, 0.16920403, 0.013769043, -0.004941373, -0.22188903, -0.1193022, 0.07823969, -0.097713776, 0.044269208, 0.036816355, -0.11568587, -0.07947363, 0.022213666));
	target += mul(nc2, min16float4x4(0.002128253, 0.014331295, 0.09004623, -0.12958615, 0.0048723617, -0.072075516, 0.024190098, 0.011900665, 0.038696863, 0.07110043, -0.10347002, 0.082676366, 0.017796163, 0.004747536, 0.11188511, -0.21652836));
	target += mul(nd2, min16float4x4(-0.051317807, 0.13453357, 0.05310306, -0.033790052, -0.06231268, 0.11130248, -0.075370945, 0.2774124, 0.04305133, -0.045057327, -0.04373203, -0.10055409, 0.042824138, -0.021799369, -0.08762204, -0.16729161));
	target += mul(ne2, min16float4x4(0.08727262, -0.074025065, -0.113067836, -0.07882044, 0.04476854, -0.14519121, -0.0434838, -0.010525559, -0.0425304, 0.106957085, -0.28644025, -0.105096966, 0.12650728, -0.15108573, 0.013723224, 0.5163331));
	target += mul(nf2, min16float4x4(-0.021519013, -0.05317946, 0.0036545463, 0.0003156711, 0.12984163, -0.11362556, 0.061670557, -0.030158816, 0.04674806, 0.16352096, -0.23135264, 0.074876174, 0.0047455966, -0.120593436, 0.032926966, -0.20865184));
	target += mul(ng2, min16float4x4(-0.029197322, -0.09204084, -0.13026133, -0.020570219, 0.043402288, -0.016610064, 0.08961119, -0.09460752, -0.057213686, -0.14044005, 0.080606215, 0.12573113, 0.094055034, 0.06523493, -0.16264567, -0.0716556));
	target += mul(nh2, min16float4x4(-0.020557933, 0.077145614, 0.04620034, 0.22271551, 0.114781894, 0.11590448, -0.03233266, 0.13224865, -0.054499403, -0.01435028, -0.09684464, 0.022300925, 0.16768926, -0.019053463, 0.08804071, -0.14398381));
	target += mul(ni2, min16float4x4(0.0025323853, -0.016476262, 0.12608051, 0.016324151, -0.0035798363, 0.020308342, 0.06474364, -0.042083416, -0.08742628, 0.016960703, -0.120870225, 0.07373239, -0.06463355, -0.018745359, -0.02229239, -0.1039809));
	target += mul(na3, min16float4x4(-0.008440462, -0.15268475, -0.09420959, -0.07718843, 0.35601637, -0.0010803771, 0.050411247, -0.09859693, -0.008227993, 0.06407621, -0.19121973, -0.15547852, -0.033705134, 0.023920614, -0.12611681, 0.021967601));
	target += mul(nb3, min16float4x4(-0.24474435, 0.07716706, -0.24876165, -0.18184067, -0.020811914, 0.07414089, -0.21809489, 0.015727887, 0.12278457, -0.08471355, -0.06071567, -0.07017344, -0.064291485, -0.07627711, 0.076017715, 0.2072293));
	target += mul(nc3, min16float4x4(0.013676314, -0.04966636, 0.06895822, 0.15210962, 0.07330876, -0.034188077, -0.0173066, 0.11160374, -0.12326202, -0.002551885, 0.0015338673, 0.1079974, 0.03733164, 0.077835836, -0.07733004, -0.0058571417));
	target += mul(nd3, min16float4x4(-0.1854433, 0.02924247, -0.14843488, 0.18941449, -0.17652206, -0.13730201, -0.29041716, -0.12161381, -0.04599312, 0.16662349, 0.045855995, -0.005569671, -0.050993398, 0.019462017, -0.10552683, -0.19930908));
	target += mul(ne3, min16float4x4(0.08246259, 0.2602547, 0.16599776, -0.12149122, -0.048151806, 0.12042248, -0.16163243, 0.00087805535, 0.0536958, 0.05350576, 0.08406917, -0.060227945, 0.19056156, -0.2276745, -0.13755281, 0.39423308));
	target += mul(nf3, min16float4x4(-0.0775391, 0.105803244, 0.08474868, -0.019011196, 0.026801828, -0.036453005, -0.018443616, -0.03005072, -0.10748735, 0.080679856, 0.07718584, 0.07871323, 0.030023575, 0.022230582, -0.090973295, -0.1363233));
	target += mul(ng3, min16float4x4(-0.14770739, -0.09530047, 0.10400556, -0.115337685, 0.14459239, 0.1432794, -0.070606485, -0.053847175, 0.09378594, -0.09445331, 0.088633865, 0.071158156, 0.04437499, -0.04694172, -0.059354205, -0.00041449978));
	target += mul(nh3, min16float4x4(0.016041227, -0.2313572, -0.011389983, 0.030348316, 0.07260269, 0.009828401, -0.06116872, 0.026138552, -0.15607156, 0.042709354, 0.079162516, -0.16348995, -0.019872159, 0.13251646, 0.020712351, -0.16324571));
	target += mul(ni3, min16float4x4(-0.08813695, 0.093021385, 0.019460218, 0.096429825, -0.010391231, 0.0216966, -0.1490125, -0.04100963, -0.024641959, 0.044109546, 0.08043847, -0.03676336, -0.026315603, 0.025947884, -0.10771212, 0.0010732685));
	target += min16float4(0.003290131, -0.0154397, 0.04528908, -0.04218369);
	conv1ups1[gxy] = target;
}

//!PASS 8
//!DESC Conv-3x3x3x16
//!IN INPUT, conv1ups, conv1ups1
//!OUT OUTPUT
//!BLOCK_SIZE 8
//!NUM_THREADS 64

void Pass8(uint2 blockStart, uint3 threadId) {
	uint2 gxy = Rmp8x8(threadId.x) + blockStart;
	
	const uint2 outputSize = GetOutputSize();
	if (gxy.x >= outputSize.x || gxy.y >= outputSize.y) {
		return;
	}

	const float2 outputPt = GetOutputPt();
	const float2 pos = (gxy + 0.5f) * outputPt;

	// [ a, d, g ]
	// [ b, e, h ]
	// [ c, f, i ]
	min16float4 a1 = conv1ups.SampleLevel(sam, pos - outputPt, 0);
	min16float4 b1 = conv1ups.SampleLevel(sam, pos + float2(-outputPt.x, 0), 0);
	min16float4 c1 = conv1ups.SampleLevel(sam, pos + float2(-outputPt.x, outputPt.y), 0);
	min16float4 d1 = conv1ups.SampleLevel(sam, pos + float2(0, -outputPt.y), 0);
	min16float4 e1 = conv1ups.SampleLevel(sam, pos, 0);
	min16float4 f1 = conv1ups.SampleLevel(sam, pos + float2(0, outputPt.y), 0);
	min16float4 g1 = conv1ups.SampleLevel(sam, pos + float2(outputPt.x, -outputPt.y), 0);
	min16float4 h1 = conv1ups.SampleLevel(sam, pos + float2(outputPt.x, 0), 0);
	min16float4 i1 = conv1ups.SampleLevel(sam, pos + outputPt, 0);

	min16float4 na1 = max(-a1, 0);
	min16float4 nb1 = max(-b1, 0);
	min16float4 nc1 = max(-c1, 0);
	min16float4 nd1 = max(-d1, 0);
	min16float4 ne1 = max(-e1, 0);
	min16float4 nf1 = max(-f1, 0);
	min16float4 ng1 = max(-g1, 0);
	min16float4 nh1 = max(-h1, 0);
	min16float4 ni1 = max(-i1, 0);

	a1 = max(a1, 0);
	b1 = max(b1, 0);
	c1 = max(c1, 0);
	d1 = max(d1, 0);
	e1 = max(e1, 0);
	f1 = max(f1, 0);
	g1 = max(g1, 0);
	h1 = max(h1, 0);
	i1 = max(i1, 0);
	
	min16float4 a2 = conv1ups1.SampleLevel(sam, pos - outputPt, 0);
	min16float4 b2 = conv1ups1.SampleLevel(sam, pos + float2(-outputPt.x, 0), 0);
	min16float4 c2 = conv1ups1.SampleLevel(sam, pos + float2(-outputPt.x, outputPt.y), 0);
	min16float4 d2 = conv1ups1.SampleLevel(sam, pos + float2(0, -outputPt.y), 0);
	min16float4 e2 = conv1ups1.SampleLevel(sam, pos, 0);
	min16float4 f2 = conv1ups1.SampleLevel(sam, pos + float2(0, outputPt.y), 0);
	min16float4 g2 = conv1ups1.SampleLevel(sam, pos + float2(outputPt.x, -outputPt.y), 0);
	min16float4 h2 = conv1ups1.SampleLevel(sam, pos + float2(outputPt.x, 0), 0);
	min16float4 i2 = conv1ups1.SampleLevel(sam, pos + outputPt, 0);

	min16float4 na2 = max(-a2, 0);
	min16float4 nb2 = max(-b2, 0);
	min16float4 nc2 = max(-c2, 0);
	min16float4 nd2 = max(-d2, 0);
	min16float4 ne2 = max(-e2, 0);
	min16float4 nf2 = max(-f2, 0);
	min16float4 ng2 = max(-g2, 0);
	min16float4 nh2 = max(-h2, 0);
	min16float4 ni2 = max(-i2, 0);

	a2 = max(a2, 0);
	b2 = max(b2, 0);
	c2 = max(c2, 0);
	d2 = max(d2, 0);
	e2 = max(e2, 0);
	f2 = max(f2, 0);
	g2 = max(g2, 0);
	h2 = max(h2, 0);
	i2 = max(i2, 0);

	min16float3 target = mul(a1, min16float4x3(-0.009692998, -0.008524317, 0.0010432196, 0.00057165127, -0.011818117, 0.0014487396, 0.0049518407, -0.001888361, -0.013262905, 0.05004511, 0.023134997, -0.016969386));
	target += mul(b1, min16float4x3(0.008501838, -0.001176035, -0.0035942376, 0.009015378, 0.011752493, 0.0061198603, -0.056669727, -0.035067406, -0.040517025, -0.039194923, 0.007251104, -0.0124227265));
	target += mul(c1, min16float4x3(0.010942934, 0.0100984, 0.0133265015, -0.019482462, -0.014820488, -0.021098822, -0.02860967, -0.10633767, -0.03296336, -0.011277147, -0.007915212, 0.008589044));
	target += mul(d1, min16float4x3(-0.004447993, -0.0019008318, 0.0054705385, -0.008042658, -0.0007432871, -0.0091506895, 0.010537624, 0.047716837, 0.01504048, -0.108882375, -0.06776622, -0.04354868));
	target += mul(e1, min16float4x3(-0.0030183722, 0.007729766, -0.007144855, 0.029383881, 0.024865916, 0.028182652, 0.16122057, 0.16675095, 0.18204775, 0.12284804, 0.031072017, 0.042543165));
	target += mul(f1, min16float4x3(0.0012941018, -0.00043673834, 0.009252594, 0.009156994, 0.0138289975, 0.015774839, -0.051840767, -0.07687406, -0.069361895, 0.017338578, 0.022834148, -0.0025963243));
	target += mul(g1, min16float4x3(0.01646397, 0.0028061832, 0.007990534, -0.0073729097, -0.011168949, -0.0024975399, -0.0066431006, -0.014508122, -0.005740217, -0.06746655, -0.02083968, -0.05371696));
	target += mul(h1, min16float4x3(-0.013606154, 0.0062064505, 0.008410423, 0.0038487792, 0.012054022, 0.007878108, 0.034913104, -0.008084116, 0.014990575, -0.005912989, 0.021872269, 0.055241022));
	target += mul(i1, min16float4x3(0.014251287, 0.0016604483, -0.006772879, 0.0028646574, 0.0015996173, -0.002210879, -0.0323296, 0.015729006, -0.017242312, -0.03718726, -0.03889927, -0.041001298));
	target += mul(a2, min16float4x3(0.007536155, 0.009848646, 0.007846354, 0.019176869, 0.019928271, 0.031777207, 0.026086887, 0.01971131, -0.017595863, 0.012899679, 0.0026994154, 0.008934449));
	target += mul(b2, min16float4x3(0.017639438, 0.01536491, 0.011161806, 0.034244597, 0.025257796, 0.031185368, -0.18240982, 0.038758054, 0.13050976, -0.0075258785, -0.0034674285, 0.008525112));
	target += mul(c2, min16float4x3(0.01788933, 0.017623115, 0.020215526, 0.0045994874, -0.0031487814, 0.003752946, -0.06494309, -0.07747321, 0.06544584, -0.004555707, -0.001776991, -0.017493976));
	target += mul(d2, min16float4x3(0.01359033, 0.02045422, 0.008234278, -0.008073938, -0.036093507, -0.0027978886, -0.37033105, 0.009709281, 0.28951523, 0.003258166, 0.0044517294, -0.003740991));
	target += mul(e2, min16float4x3(-0.036449786, -0.03035285, -0.025356997, 0.097153045, 0.10745537, 0.08421458, 0.043944303, -0.004867672, -0.15142196, 0.007044417, -0.00785739, 0.007504869));
	target += mul(f2, min16float4x3(-0.007951127, -0.008863303, -0.012213915, 0.007273406, 0.00944796, -0.002621692, 0.2919848, 0.06830943, -0.16119143, -0.0033908382, 0.007383878, 0.007847461));
	target += mul(g2, min16float4x3(0.011670784, 0.00805604, 0.013980011, -0.032067183, -0.045659855, -0.03957935, 0.14678614, 0.014678316, -0.11203954, -0.002894618, 0.008089503, 0.0056759617));
	target += mul(h2, min16float4x3(0.008941132, -0.008732514, -0.004122878, -0.01872218, 0.0058594598, -0.014218105, 0.15922345, -0.00061763515, -0.10605325, 0.0059564817, 0.0062196897, -0.0031137357));
	target += mul(i2, min16float4x3(-0.027044835, -0.0113663385, -0.018061407, -0.01064461, 0.0004394501, 0.0068360637, 0.12218274, -0.025980305, 0.060082816, 0.002298275, -0.005121948, -0.0018933173));
	target += mul(na1, min16float4x3(-0.014044151, -0.0055593867, -0.0091519095, 0.018282808, -0.054974634, -0.02104256, 0.004737865, 0.009833153, 0.0050819647, 0.009256364, 0.004517343, -0.0012567915));
	target += mul(nb1, min16float4x3(0.035084303, 0.019331766, -0.006399992, -0.08042094, -0.14020248, -0.13438301, -0.0014871466, -0.0071605383, -0.0070841024, 0.001705956, -0.010914731, -0.0022737188));
	target += mul(nc1, min16float4x3(-0.024562238, -0.025555398, 0.00043982622, 0.04687896, 0.062265635, 0.06194832, 0.016357735, 0.0056735775, 0.01868422, 0.0035063815, 0.0050708377, 0.009102912));
	target += mul(nd1, min16float4x3(0.024276884, 0.031309772, 0.053946678, 0.027081756, 0.023922514, 0.051302873, -0.005081098, -0.013981954, -0.007141123, -0.017242068, -0.00036468913, 0.0071311933));
	target += mul(ne1, min16float4x3(0.096000426, 0.12978247, 0.089689955, 0.03013154, 0.09065384, 0.010782777, -0.009774296, -0.010487119, -0.018002238, 0.027585275, 0.018800229, 0.007482455));
	target += mul(nf1, min16float4x3(-0.031725004, -0.05638542, -0.06471826, -0.038512804, -0.036520924, -0.026658544, 0.0019714478, 0.004168433, 0.0036675548, 0.009312959, -0.009726487, 0.003937418));
	target += mul(ng1, min16float4x3(0.008056586, -0.03609238, -0.0035044104, -0.0052967947, 0.010446542, 0.010737699, -0.00941154, -0.005599727, -0.0071648047, 0.0028106347, 0.0063315486, 0.0005620387));
	target += mul(nh1, min16float4x3(-0.10104362, -0.06228799, -0.057575073, -0.0008651546, -0.010849562, -0.0066441186, -0.016244762, -0.0053532585, -0.012414173, -0.012507298, 0.005470365, 0.0032063425));
	target += mul(ni1, min16float4x3(-0.019126823, -0.022827078, -0.01918732, -0.0049576303, -0.010899637, -0.01990915, 0.019013962, 0.007385637, 0.015615745, 0.025586424, 0.02317941, 0.019631773));
	target += mul(na2, min16float4x3(-0.011578009, -0.0037521352, -0.0044622095, -0.0022668878, 0.0022691146, -0.00570573, 0.0052153515, 0.005547525, 0.0033032992, 0.009927488, -0.0061824876, -0.016856432));
	target += mul(nb2, min16float4x3(-0.07627339, -0.0595728, -0.08247348, -0.016201988, -0.019643232, -0.021891698, -0.0033560628, 0.0056153075, 0.005510208, -0.0061155884, 0.004726241, 0.03613314));
	target += mul(nc2, min16float4x3(-0.026918657, -0.017315133, -0.021586075, -0.021625597, -0.008547036, -0.011233614, -0.0047514364, -0.0029167454, -0.00583421, 0.012949899, 0.0035817428, -0.0045735473));
	target += mul(nd2, min16float4x3(-0.08581085, -0.07063111, -0.06381294, -0.0040735947, -0.012934923, -0.0057904166, -0.0077691195, -0.00034605907, 0.0023017807, -0.00029635165, -0.042357627, -0.057994146));
	target += mul(ne2, min16float4x3(0.05193261, 0.047533646, 0.071092665, -0.015042884, -0.023481138, -0.020945435, 0.008216166, 0.004034294, 0.0030410702, 0.10532969, 0.13052966, 0.11042539));
	target += mul(nf2, min16float4x3(0.052652936, 0.045103617, 0.036393207, 0.0018712351, -0.009865708, -0.00591473, -0.0008652197, 7.966737e-05, -0.004292879, -0.013765752, -0.0603564, 0.032057546));
	target += mul(ng2, min16float4x3(0.0020095943, -0.014555452, -0.008721001, 0.00085926603, -0.0012287357, 0.007974135, 0.004697991, -1.4738258e-05, -0.0048043244, 0.047545042, 0.099660076, 0.09649951));
	target += mul(nh2, min16float4x3(0.024352267, 0.03303334, 0.02903438, 0.0062978864, 0.014672455, 0.0043003284, -0.0017531263, -0.0032476797, 0.001345206, -0.20736417, -0.1745426, -0.32957983));
	target += mul(ni2, min16float4x3(0.027512033, 0.029760962, 0.033007182, 9.0356014e-05, 0.0061743665, 0.0036443318, -0.016802983, -0.019364875, -0.014311061, 0.021530075, 0.059616566, 0.07120056));
	target += min16float3(-0.0007544955, -0.0007692414, 0.00032997545);
	
	OUTPUT[gxy] = float4(target + INPUT.SampleLevel(sam1, pos, 0).rgb, 1);
}
